In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import researchpy as rp
In [3]:
df = pd.read_csv('dataset.csv')
In [5]:
df.head()
Out[5]:
loan_id no_of_dependents education self_employed income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value loan_status
0 1 2 Graduate No 9600000 29900000 12 778 2400000 17600000 22700000 8000000 Approved
1 2 0 Not Graduate Yes 4100000 12200000 8 417 2700000 2200000 8800000 3300000 Rejected
2 3 3 Graduate No 9100000 29700000 20 506 7100000 4500000 33300000 12800000 Rejected
3 4 3 Graduate No 8200000 30700000 8 467 18200000 3300000 23300000 7900000 Rejected
4 5 5 Not Graduate Yes 9800000 24200000 20 382 12400000 8200000 29400000 5000000 Rejected
In [7]:
df.tail()
Out[7]:
loan_id no_of_dependents education self_employed income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value loan_status
4264 4265 5 Graduate Yes 1000000 2300000 12 317 2800000 500000 3300000 800000 Rejected
4265 4266 0 Not Graduate Yes 3300000 11300000 20 559 4200000 2900000 11000000 1900000 Approved
4266 4267 2 Not Graduate No 6500000 23900000 18 457 1200000 12400000 18100000 7300000 Rejected
4267 4268 1 Not Graduate No 4100000 12800000 8 780 8200000 700000 14100000 5800000 Approved
4268 4269 1 Graduate No 9200000 29700000 10 607 17800000 11800000 35700000 12000000 Approved
In [11]:
df.shape
Out[11]:
(4269, 13)
In [6]:
df.describe()
Out[6]:
loan_id no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
count 4269.000000 4269.000000 4.269000e+03 4.269000e+03 4269.000000 4269.000000 4.269000e+03 4.269000e+03 4.269000e+03 4.269000e+03
mean 2135.000000 2.498712 5.059124e+06 1.513345e+07 10.900445 599.936051 7.472617e+06 4.973155e+06 1.512631e+07 4.976692e+06
std 1232.498479 1.695910 2.806840e+06 9.043363e+06 5.709187 172.430401 6.503637e+06 4.388966e+06 9.103754e+06 3.250185e+06
min 1.000000 0.000000 2.000000e+05 3.000000e+05 2.000000 300.000000 -1.000000e+05 0.000000e+00 3.000000e+05 0.000000e+00
25% 1068.000000 1.000000 2.700000e+06 7.700000e+06 6.000000 453.000000 2.200000e+06 1.300000e+06 7.500000e+06 2.300000e+06
50% 2135.000000 3.000000 5.100000e+06 1.450000e+07 10.000000 600.000000 5.600000e+06 3.700000e+06 1.460000e+07 4.600000e+06
75% 3202.000000 4.000000 7.500000e+06 2.150000e+07 16.000000 748.000000 1.130000e+07 7.600000e+06 2.170000e+07 7.100000e+06
max 4269.000000 5.000000 9.900000e+06 3.950000e+07 20.000000 900.000000 2.910000e+07 1.940000e+07 3.920000e+07 1.470000e+07
In [7]:
df.describe().T
Out[7]:
count mean std min 25% 50% 75% max
loan_id 4269.0 2.135000e+03 1.232498e+03 1.0 1068.0 2135.0 3202.0 4269.0
no_of_dependents 4269.0 2.498712e+00 1.695910e+00 0.0 1.0 3.0 4.0 5.0
income_annum 4269.0 5.059124e+06 2.806840e+06 200000.0 2700000.0 5100000.0 7500000.0 9900000.0
loan_amount 4269.0 1.513345e+07 9.043363e+06 300000.0 7700000.0 14500000.0 21500000.0 39500000.0
loan_term 4269.0 1.090045e+01 5.709187e+00 2.0 6.0 10.0 16.0 20.0
cibil_score 4269.0 5.999361e+02 1.724304e+02 300.0 453.0 600.0 748.0 900.0
residential_assets_value 4269.0 7.472617e+06 6.503637e+06 -100000.0 2200000.0 5600000.0 11300000.0 29100000.0
commercial_assets_value 4269.0 4.973155e+06 4.388966e+06 0.0 1300000.0 3700000.0 7600000.0 19400000.0
luxury_assets_value 4269.0 1.512631e+07 9.103754e+06 300000.0 7500000.0 14600000.0 21700000.0 39200000.0
bank_asset_value 4269.0 4.976692e+06 3.250185e+06 0.0 2300000.0 4600000.0 7100000.0 14700000.0
In [8]:
df.isnull().sum()
Out[8]:
loan_id                      0
 no_of_dependents            0
 education                   0
 self_employed               0
 income_annum                0
 loan_amount                 0
 loan_term                   0
 cibil_score                 0
 residential_assets_value    0
 commercial_assets_value     0
 luxury_assets_value         0
 bank_asset_value            0
 loan_status                 0
dtype: int64
In [9]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   loan_id                    4269 non-null   int64 
 1    no_of_dependents          4269 non-null   int64 
 2    education                 4269 non-null   object
 3    self_employed             4269 non-null   object
 4    income_annum              4269 non-null   int64 
 5    loan_amount               4269 non-null   int64 
 6    loan_term                 4269 non-null   int64 
 7    cibil_score               4269 non-null   int64 
 8    residential_assets_value  4269 non-null   int64 
 9    commercial_assets_value   4269 non-null   int64 
 10   luxury_assets_value       4269 non-null   int64 
 11   bank_asset_value          4269 non-null   int64 
 12   loan_status               4269 non-null   object
dtypes: int64(10), object(3)
memory usage: 433.7+ KB
In [9]:
# object türündeki sütunları category türüne çevirelim
object_columns = df.select_dtypes(include='object').columns
df[object_columns] = df[object_columns].astype('category')
In [11]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   loan_id                    4269 non-null   int64   
 1    no_of_dependents          4269 non-null   int64   
 2    education                 4269 non-null   category
 3    self_employed             4269 non-null   category
 4    income_annum              4269 non-null   int64   
 5    loan_amount               4269 non-null   int64   
 6    loan_term                 4269 non-null   int64   
 7    cibil_score               4269 non-null   int64   
 8    residential_assets_value  4269 non-null   int64   
 9    commercial_assets_value   4269 non-null   int64   
 10   luxury_assets_value       4269 non-null   int64   
 11   bank_asset_value          4269 non-null   int64   
 12   loan_status               4269 non-null   category
dtypes: category(3), int64(10)
memory usage: 346.5 KB
In [12]:
for column in df.columns:
    unique_values = df[column].unique()
    print(f"Unique values in column '{column}':")
    print(unique_values)
    print()
Unique values in column 'loan_id':
[   1    2    3 ... 4267 4268 4269]

Unique values in column ' no_of_dependents':
[2 0 3 5 4 1]

Unique values in column ' education':
[' Graduate', ' Not Graduate']
Categories (2, object): [' Graduate', ' Not Graduate']

Unique values in column ' self_employed':
[' No', ' Yes']
Categories (2, object): [' No', ' Yes']

Unique values in column ' income_annum':
[9600000 4100000 9100000 8200000 9800000 4800000 8700000 5700000  800000
 1100000 2900000 6700000 5000000 1900000 4700000  500000 2700000 6300000
 5800000 6500000 4900000 3100000 2400000 7000000 9000000 8400000 1700000
 1600000 8000000 3600000 1500000 7800000 1400000 4200000 5500000 9500000
 7300000 3800000 5100000 4300000 9300000 7400000 8500000 8800000 3300000
 3900000 8300000 5600000 5300000 2600000  700000 3500000 9900000 3000000
 6800000 2000000 1000000  300000 6600000 9400000 4400000  400000 6200000
 9700000 7100000  600000 7200000  900000  200000 1800000 4600000 2200000
 2500000 8600000 4000000 5200000 8900000 1300000 4500000 8100000 9200000
 2800000 7500000 6400000 6900000 7700000 3200000 7900000 5900000 3400000
 2100000 3700000 5400000 2300000 7600000 6000000 6100000 1200000]

Unique values in column ' loan_amount':
[29900000 12200000 29700000 30700000 24200000 13500000 33000000 15000000
  2200000  4300000 11200000 22700000 11600000 31500000  7400000 10700000
  1600000  9400000 10300000 14600000 19400000 14000000 25700000  1400000
  9800000  9500000 28100000  5600000 24000000 25300000 12000000 22000000
 11900000  3400000  6200000 27200000  7700000  5100000 18100000 24900000
  2300000 13400000 27800000 19100000 20500000 25400000 24700000  7600000
 23000000 19700000 24500000 10600000 30500000 18400000 18200000 18900000
 28900000  7500000 12300000 29100000 10100000 12400000  5000000  1500000
 18600000 18300000 16700000  8400000  6500000 14800000 33500000 29400000
  8900000 31200000 21200000  8600000  8200000  3800000 28300000  8000000
 37600000 21100000 20700000  6400000  2000000  1100000 25000000 10800000
   900000 12900000  4500000 23600000  9700000 35900000  6800000 22100000
 23400000 23200000 15800000 32900000  3200000 18700000 19500000   600000
   800000  2600000  1200000 20800000 22600000  3600000 13900000  5500000
  6700000  8500000   700000 17400000 32100000 11100000 19300000 28800000
 20600000 35000000 33300000  1300000  9600000 15100000  5300000 22300000
 15900000 12800000 35200000 17500000 10500000  4100000 28200000 14300000
 13300000 17900000  9900000 23100000  3100000 10900000 30400000 23300000
 19800000  2900000 13200000 27100000  6000000 16400000 15600000 30100000
 20900000 15400000  3300000 32700000 15200000  7800000 17000000 11300000
 10400000 11000000  1700000 27000000  3500000 32400000 34600000 15500000
 22500000 16200000 29300000  9100000 30900000  4700000  2400000 35400000
 20000000 38800000  8100000 19600000 34300000 22200000 14400000 16800000
 27900000 20400000  4900000  4000000 19900000  1800000 11800000 25500000
  9300000 20200000  1000000 38200000  6600000 33200000 24400000 14100000
 28700000 23500000 17100000  5700000  7000000 16900000 21000000 12600000
 28000000 17200000 24100000 26300000 38400000 32200000 26900000 21900000
 12500000 29800000 17600000 29000000 35300000 14500000 22400000  8700000
  6300000  5200000  2700000 25900000 21500000 15700000  7300000  7200000
 18800000  2100000  4600000  5400000 15300000  2800000 36400000 27500000
 11400000 18500000 26700000  5800000 21400000  2500000 14700000 17300000
 28600000 31900000  8800000 30300000 24800000 19200000 39500000  9000000
 19000000 23900000 31800000 10000000 14200000 22800000 32300000   500000
  6100000  5900000 24600000 16000000 12700000 13100000  6900000 27700000
 29200000 13800000 18000000 27400000 17800000 20100000 32600000  4800000
 11500000 22900000  1900000   300000 16600000 28500000 25100000 25800000
 21800000 30000000 10200000 31700000 26000000  8300000 35500000 33900000
 17700000  4200000  9200000 20300000   400000 34700000 26500000 16500000
 14900000 37000000 27300000 26200000 25600000 16300000 24300000 21600000
 11700000 34200000 34500000 13000000 23700000 30800000  3900000 13600000
 38700000 26600000 37900000 21700000 29600000 23800000 34000000 25200000
 35700000 26100000 16100000 13700000 38000000 37500000  7900000 34400000
 37300000 21300000 28400000 35800000 38500000 34900000 33600000 36800000
 31400000  3000000  4400000 26400000 37800000  7100000 34100000 30200000
 32000000 31300000 12100000 36700000 30600000  3700000 31600000 29500000
 31000000 34800000 36500000 36000000 36300000 31100000 26800000 35100000
 32800000 33100000 32500000 33400000 27600000 33700000 36600000 33800000
 37700000 36100000]

Unique values in column ' loan_term':
[12  8 20 10  4  2 18 16 14  6]

Unique values in column ' cibil_score':
[778 417 506 467 382 319 678 782 388 547 538 311 679 469 794 663 780 736
 652 315 530 551 324 514 696 662 336 850 313 363 436 830 612 691 636 348
 352 712 822 540 342 787 331 677 634 502 435 689 657 590 818 431 841 421
 797 478 669 365 586 784 364 715 693 777 312 340 386 418 735 494 671 697
 801 576 639 470 826 613 713 439 387 402 837 641 489 844 452 366 300 861
 562 463 702 618 633 764 591 719 317 302 879 437 456 647 379 717 545 570
 865 821 859 395 429 565 357 465 479 425 786 564 501 727 894 829 802 543
 772 572 709 481 306 415 548 701 890 704 318 761 524 681 737 638 656 341
 371 886 748 376 873 309 869 534 566 742 824 575 766 888 622 458 327 682
 583 816 455 355 389 870 827 768 707 665 420 471 819 809 744 484 673 695
 473 491 733 434 774 503 598 796 632 770 667 585 851 378 807 831 674 725
 600 536 477 560 539 852 853 729 546 789 325 716 523 345 649 666 813 599
 513 483 308 651 433 403 405 516 468 672 549 450 320 476 573 877 531 474
 499 726 485 708 404 512 441 555 466 427 593 731 451 628 424 381 449 445
 781 721 563 419 372 885 596 349 685 377 620 611 767 592 900 814 755 584
 380 655 833 658 648 730 621 610 339 650 367 847 360 880 608 760 385 710
 711 855 771 338 769 629 699 391 891 775 897 839 868 353 792 635 457 350
 874 411 482 396 303 728 698 490 504 790 860 492 834 443 329 739 867 307
 375 601 756 838 442 808 597 373 552 607 823 328 580 559 587 817 765 383
 843 783 409 625 645 887 791 686 722 407 895 453 627 889 684 578 369 557
 519 741 508 493 664 362 703 758 828 528 623 579 846 589 845 401 522 588
 863 798 668 881 406 799 743 734 812 459 448 517 426 785 472 683 803 361
 464 747 335 394 848 788 509 899 595 322 631 330 567 323 670 609 354 746
 857 556 393 688 384 414 815 854 849 346 856 440 616 461 866 820 544 561
 614 351 399 344 301 763 624 644 642 423 724 706 811 326 488 475 337 511
 810 428 356 594 480 757 321 368 806 832 571 527 333 532 754 835 553 400
 558 515 740 447 745 495 660 883 795 762 462 779 752 305 310 525 661 884
 800 568 842 653 617 460 804 358 836 554 840 430 347 550 878 603 444 875
 343 714 529 446 705 898 487 615 676 605 569 410 753 454 619 858 392 637
 359 723 304 690 862 496 659 542 749 694 574 692 521 541 640 630 535 422
 606 370 700 751 896 577 537 316 412 793 390 397 876 498 872 871 497 759
 413 602 720 505 582 416 510 500 626 654 892 680 750 314 520 776 825 646
 518 805 332 882 604 507 408 374 687 533 581 675 773 718 432 526 398 643
 893 438 486 732 334 738 864]

Unique values in column ' residential_assets_value':
[ 2400000  2700000  7100000 18200000 12400000  6800000 22500000 13200000
  1300000  3200000  8100000 15300000  6400000 10800000  1900000  5700000
  2900000  1000000 10300000  9500000  3800000 13100000   900000  7900000
 11500000  4500000  2300000 21800000 20200000  3600000   700000  9700000
  3400000  7000000   100000  8600000 22300000   200000  2200000 13000000
  5400000   800000   500000  8700000 15400000  7400000  1200000  2100000
 19300000 18500000  -100000 23800000  4700000 24400000  1600000  7600000
  6100000  5500000  4000000 18400000  3900000  6500000   600000 14300000
 11600000 17600000 25500000  9400000  5300000 17100000 20400000  5100000
 24100000 19200000  9100000 14700000 25900000   300000 11400000  7800000
 19600000  5600000  7300000 19500000 16100000  1500000 12700000 26800000
 12200000   400000 15100000 11700000  4400000  6600000  1100000  2600000
 14600000 13600000 15900000        0  5800000  3700000 24200000  4900000
  2500000  7700000 21900000  3300000  9800000 12100000  3000000 16800000
 12600000  1700000  8800000 13700000 10000000  6300000 15200000 22000000
  8300000 11300000 14400000 11100000  3100000 15500000  3500000 13800000
  9000000 14100000 14800000  8500000 18700000  2800000  9200000 20000000
  4100000 22800000 16500000  6000000 23200000  5000000 25600000 24500000
 13400000 14000000 16000000 18100000  8000000  9900000 17200000  1800000
  1400000 10400000  4200000  6900000 16600000  9600000 17400000  8400000
 11900000 10500000  5900000  7200000 14200000 22900000  4300000 16900000
  6200000 12500000 15700000  9300000 18000000  8200000 10700000  4800000
 10200000 21500000 12900000  4600000 15600000 10600000  5200000 21700000
 11000000 23300000 20800000 23000000 11800000 21100000 10900000  2000000
 15800000 23400000 13500000 23900000 17300000 18300000 19400000 22100000
 12000000 24000000  6700000 13900000 20600000 25400000  7500000 10100000
 17700000 28300000 11200000 18800000 14500000 24900000 26300000 13300000
 22400000 27600000 21400000 28700000 25300000 25800000 18600000 19100000
 22200000 28200000 19700000 25200000 24700000 16700000 17000000 16300000
 15000000 21300000 12800000 20300000 12300000 19900000 16200000 19000000
 16400000  8900000 22700000 25700000 21200000 27000000 21600000 17800000
 28500000 14900000 17900000 28400000 23700000 20500000 24600000 20100000
 22600000 20900000 21000000 26600000 26200000 19800000 17500000 28000000
 24800000 26900000 26100000 20700000 29100000 18900000 25100000 23500000
 24300000 27500000 25000000 23100000 27400000 27300000]

Unique values in column ' commercial_assets_value':
[17600000  2200000  4500000  3300000  8200000  8300000 14800000  5700000
   800000  1400000  4700000  5800000  9600000 16600000  1200000  3900000
   100000  2800000        0  3500000  1600000 11300000  1700000   600000
  8700000  3100000 10600000  4200000 11900000 12400000  5200000  7400000
   200000   700000   300000  1300000 11200000 12100000  1500000  6300000
  6900000  9100000  8600000 10500000  1800000  9300000  5600000 10300000
  4900000 16300000  1900000  6100000  9700000 11700000  9400000  3800000
  2500000  7800000  8900000   500000 11400000 13600000  2600000  4300000
  3200000  1100000   400000  4800000  8500000 15200000  3600000 16500000
  2700000  7600000  6000000 12200000  2000000  1000000  6200000  8000000
  5900000  4100000  6500000 10000000 16700000   900000  2100000  9500000
  5500000  4400000 18700000  5100000 11100000 12600000  5000000  6800000
  2400000  7500000  2900000 10900000 11000000 11600000  2300000  3400000
 11500000  8100000  5300000  6700000 10200000 10800000  4000000  4600000
  7000000  6600000 17500000 16200000 12300000 12800000 13200000 16400000
 19000000 16100000  8800000  3700000  5400000  8400000 12000000 15000000
  9200000 17200000 11800000 14900000 13800000  7900000 10400000 18500000
 12500000 13400000  9900000 12700000 15400000 14700000 15600000 14000000
 16000000 13000000 14300000  9800000 18800000 13900000  7200000  7100000
 15100000 15500000 13300000  3000000 13700000  7300000 17800000  6400000
 17900000 12900000 14600000 10100000 18300000  9000000 14500000 14200000
 17300000 13100000 10700000 16800000 18900000 18400000 18200000 14100000
 14400000  7700000 17000000 15900000 15300000 19400000 16900000 13500000
 17400000 15700000 15800000 17700000]

Unique values in column ' luxury_assets_value':
[22700000  8800000 33300000 23300000 29400000 13700000 29200000 11800000
  2800000  3300000  9500000 20400000 14600000 20900000  5900000 16400000
  1300000  6700000  6200000 23500000 18000000 22200000 19500000  1100000
 10000000  6600000 25300000  5400000 27500000 33700000 25500000 21700000
  2200000 19900000 19000000  6000000  5300000 16700000  5600000 31000000
  3900000  1800000 16200000 21400000  8700000 17700000 18500000 37700000
 20500000 21800000  9300000 31900000 19400000 16300000 34600000 17500000
 18600000 25900000 26500000 27400000 10500000 13100000 14900000 24100000
  4900000  1900000 11900000 21500000 12600000  4800000 12900000 35400000
 25200000  2400000 12300000 26600000 10300000 11000000  3800000 27900000
 23400000 12500000 22400000  3200000   700000 18200000 23200000 36400000
 13800000  1200000   500000 11400000  4100000 23800000 20800000  9900000
 11700000   900000 17900000 19300000 33400000  7700000 22600000  1500000
 23600000  2700000  2000000   800000 15500000 33900000 25700000  4400000
 13900000  8600000  7500000  7400000 12800000 24500000  2100000  5100000
  7200000 18800000 18100000  2900000 36100000 14000000  8400000 27800000
  8000000 10400000 17800000  4300000 27000000 16000000  5000000 23100000
 18700000  9700000 17400000  6500000 33800000  5700000 20000000  8200000
 14300000 26300000 26900000 26400000 27700000 24000000 22500000 28000000
 31800000 12200000 38200000 38600000 19600000 21900000  3500000 27200000
  3700000 15000000 34700000 23700000  8500000 10600000 16100000 21200000
 13600000  7000000 18400000  7100000 14700000  9600000 11200000 24300000
 20300000  6400000 23000000 25000000  6300000 22800000 31600000 29700000
 29100000 30800000 13000000 26000000 14500000 16800000 29500000 28200000
 19100000 26700000  5200000  4600000  7900000 16900000  9800000 15600000
 30500000 30200000 12400000  3000000 20100000  8900000 19800000 35500000
 28500000 25400000 16500000 17200000  4700000 28800000  3600000   400000
 14200000 14800000 14100000 22900000 26100000 36500000 28600000 34500000
 30900000  6900000  2300000 25100000 28900000 14400000 29900000  7600000
 37000000 15800000  1400000 33500000 13500000   300000 13200000  1600000
  8100000  5800000 10900000 11300000 10200000 13300000 34900000 17300000
 22000000 32100000 20700000 26800000 27100000 10100000 21000000 19700000
   600000  7300000 32000000 22100000  2600000  9100000 31100000 32700000
 32800000 24900000  5500000 32600000  3400000  9000000 12700000  6800000
 17100000 20200000 10800000 34100000 26200000 29000000 11600000 31300000
 28400000 11100000 12000000 12100000 17000000 15100000 28300000 16600000
 15300000 18900000 23900000 24400000 17600000 11500000 21100000 30000000
 29600000 15200000 27600000 20600000 30400000  9400000  7800000 18300000
  4200000  8300000 30100000 25800000  1700000 21600000 29300000 35700000
  4500000 30300000 10700000 24800000 31500000 24700000 19200000 13400000
 35100000 35600000 15900000 33000000 31700000  9200000  6100000 15400000
  2500000 24600000 35800000 22300000 34300000 36600000  3100000 28700000
 36900000 28100000 32500000 38100000 39200000 15700000 37800000 27300000
 31200000 39100000 21300000 24200000 37200000 37900000 25600000 33600000
 30600000 32900000 37400000 34000000  1000000 37600000 35900000 32300000
 32400000 31400000 30700000 34400000  4000000 32200000 29800000 33200000
 34800000 36000000 36200000 36800000 35300000 38000000 33100000 37300000
 34200000 35000000 36700000]

Unique values in column ' bank_asset_value':
[ 8000000  3300000 12800000  7900000  5000000  5100000  4300000  6000000
   600000  1600000  3100000  6400000  1900000  4400000   700000  5900000
  6100000  5400000  8500000   300000  2600000  7200000  2500000  9700000
  9300000  1000000  5800000   900000  1400000  7100000  2900000  9000000
  5200000   800000 10900000  4900000  6500000  8200000 11700000 10500000
 11300000  3400000  6200000  8700000  4100000  4800000 11400000  4700000
  2800000 11900000  5500000  2400000  4200000  7600000  5600000  2000000
  1100000  6300000 11100000  8600000  6800000  3600000 10200000 12700000
  2100000  1300000   400000  7000000  7300000   100000   200000 11600000
  1800000  9800000  8100000  7500000 13400000  9600000  3800000  8400000
  3200000  1200000  4600000  8300000  4500000  3500000  2300000  7400000
  1700000  9500000  3000000  2200000  9200000  4000000 11200000   500000
  9400000 14400000 10000000  6600000 12500000  1500000  9100000  7700000
  7800000 10300000  9900000  8800000  5700000 10400000 11800000  5300000
 12400000  2700000 11500000  3900000        0 10800000  6700000 12900000
 12300000  6900000 12200000 13500000  8900000  3700000 12100000 13600000
 13100000 10600000 13900000 12000000 13000000 10100000 10700000 11000000
 13200000 14700000 14000000 13300000 13800000 14600000 14300000 14200000
 13700000 14100000]

Unique values in column ' loan_status':
[' Approved', ' Rejected']
Categories (2, object): [' Approved', ' Rejected']

In [11]:
category_columns = df.select_dtypes(include=["category"])
In [13]:
category_columns
Out[13]:
education self_employed loan_status
0 Graduate No Approved
1 Not Graduate Yes Rejected
2 Graduate No Rejected
3 Graduate No Rejected
4 Not Graduate Yes Rejected
... ... ... ...
4264 Graduate Yes Rejected
4265 Not Graduate Yes Approved
4266 Not Graduate No Rejected
4267 Not Graduate No Approved
4268 Graduate No Approved

4269 rows × 3 columns

In [15]:
category_columns.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4269 entries, 0 to 4268
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype   
---  ------          --------------  -----   
 0    education      4269 non-null   category
 1    self_employed  4269 non-null   category
 2    loan_status    4269 non-null   category
dtypes: category(3)
memory usage: 13.0 KB
In [17]:
print(category_columns.columns)
Index([' education', ' self_employed', ' loan_status'], dtype='object')
In [19]:
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
category_columns = category_columns.rename(columns=lambda x: x.strip())
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
df = df.rename(columns=lambda x: x.strip())
In [21]:
category_columns.education.unique()
Out[21]:
[' Graduate', ' Not Graduate']
Categories (2, object): [' Graduate', ' Not Graduate']
In [23]:
category_columns["education"].value_counts().count()
Out[23]:
2
In [25]:
category_columns["education"].value_counts()
Out[25]:
education
Graduate        2144
Not Graduate    2125
Name: count, dtype: int64
In [27]:
category_columns["education"].value_counts().plot.barh();
No description has been provided for this image
In [22]:
category_columns.self_employed.unique()
Out[22]:
[' No', ' Yes']
Categories (2, object): [' No', ' Yes']
In [23]:
category_columns["self_employed"].value_counts().count()
Out[23]:
2
In [24]:
category_columns["self_employed"].value_counts()
Out[24]:
self_employed
Yes    2150
No     2119
Name: count, dtype: int64
In [25]:
category_columns["self_employed"].value_counts().plot.barh();
No description has been provided for this image
In [26]:
category_columns.loan_status.unique()
Out[26]:
[' Approved', ' Rejected']
Categories (2, object): [' Approved', ' Rejected']
In [27]:
category_columns["loan_status"].value_counts().count()
Out[27]:
2
In [28]:
category_columns["loan_status"].value_counts()
Out[28]:
loan_status
Approved    2656
Rejected    1613
Name: count, dtype: int64
In [29]:
category_columns["loan_status"].value_counts().plot.barh();
No description has been provided for this image
In [30]:
num_columns = df.select_dtypes(include=["int64"])
In [31]:
num_columns.head()
Out[31]:
loan_id no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
0 1 2 9600000 29900000 12 778 2400000 17600000 22700000 8000000
1 2 0 4100000 12200000 8 417 2700000 2200000 8800000 3300000
2 3 3 9100000 29700000 20 506 7100000 4500000 33300000 12800000
3 4 3 8200000 30700000 8 467 18200000 3300000 23300000 7900000
4 5 5 9800000 24200000 20 382 12400000 8200000 29400000 5000000
In [32]:
num_columns.describe().T
Out[32]:
count mean std min 25% 50% 75% max
loan_id 4269.0 2.135000e+03 1.232498e+03 1.0 1068.0 2135.0 3202.0 4269.0
no_of_dependents 4269.0 2.498712e+00 1.695910e+00 0.0 1.0 3.0 4.0 5.0
income_annum 4269.0 5.059124e+06 2.806840e+06 200000.0 2700000.0 5100000.0 7500000.0 9900000.0
loan_amount 4269.0 1.513345e+07 9.043363e+06 300000.0 7700000.0 14500000.0 21500000.0 39500000.0
loan_term 4269.0 1.090045e+01 5.709187e+00 2.0 6.0 10.0 16.0 20.0
cibil_score 4269.0 5.999361e+02 1.724304e+02 300.0 453.0 600.0 748.0 900.0
residential_assets_value 4269.0 7.472617e+06 6.503637e+06 -100000.0 2200000.0 5600000.0 11300000.0 29100000.0
commercial_assets_value 4269.0 4.973155e+06 4.388966e+06 0.0 1300000.0 3700000.0 7600000.0 19400000.0
luxury_assets_value 4269.0 1.512631e+07 9.103754e+06 300000.0 7500000.0 14600000.0 21700000.0 39200000.0
bank_asset_value 4269.0 4.976692e+06 3.250185e+06 0.0 2300000.0 4600000.0 7100000.0 14700000.0
In [33]:
print("no_of_dependents")
print("---------------------------------")
print("Ortalama: " + str(num_columns["no_of_dependents"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["no_of_dependents"].count())) 
print("Maksimum Değer: " + str(num_columns["no_of_dependents"].max()))
print("Minimum Değer: " + str(num_columns["no_of_dependents"].min()))
print("Medyan: " + str(num_columns["no_of_dependents"].median()))
print("Standart Sapma: " + str(num_columns["no_of_dependents"].std()))
print("---------------------------------\n\n")

print("income_annum")
print("---------------------------------")
print("Ortalama: " + str(num_columns["income_annum"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["income_annum"].count())) 
print("Maksimum Değer: " + str(num_columns["income_annum"].max()))
print("Minimum Değer: " + str(num_columns["income_annum"].min()))
print("Medyan: " + str(num_columns["income_annum"].median()))
print("Standart Sapma: " + str(num_columns["income_annum"].std()))
print("---------------------------------\n\n")

print("loan_amount")
print("---------------------------------")
print("Ortalama: " + str(num_columns["loan_amount"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["loan_amount"].count())) 
print("Maksimum Değer: " + str(num_columns["loan_amount"].max()))
print("Minimum Değer: " + str(num_columns["loan_amount"].min()))
print("Medyan: " + str(num_columns["loan_amount"].median()))
print("Standart Sapma: " + str(num_columns["loan_amount"].std()))
print("---------------------------------\n\n")

print("loan_term")
print("---------------------------------")
print("Ortalama: " + str(num_columns["loan_term"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["loan_term"].count())) 
print("Maksimum Değer: " + str(num_columns["loan_term"].max()))
print("Minimum Değer: " + str(num_columns["loan_term"].min()))
print("Medyan: " + str(num_columns["loan_term"].median()))
print("Standart Sapma: " + str(num_columns["loan_term"].std()))
print("---------------------------------\n\n")

print("cibil_score")
print("---------------------------------")
print("Ortalama: " + str(num_columns["cibil_score"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["cibil_score"].count())) 
print("Maksimum Değer: " + str(num_columns["cibil_score"].max()))
print("Minimum Değer: " + str(num_columns["cibil_score"].min()))
print("Medyan: " + str(num_columns["cibil_score"].median()))
print("Standart Sapma: " + str(num_columns["cibil_score"].std()))
print("---------------------------------\n\n")

print("residential_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["residential_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["residential_assets_value"].count())) 
print("Maksimum Değer: " + str(num_columns["residential_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["residential_assets_value"].min()))
print("Medyan: " + str(num_columns["residential_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["residential_assets_value"].std()))
print("---------------------------------\n\n")

print("commercial_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["commercial_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["commercial_assets_value"].count())) 
print("Maksimum Değer: " + str(num_columns["commercial_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["commercial_assets_value"].min()))
print("Medyan: " + str(num_columns["commercial_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["commercial_assets_value"].std()))
print("---------------------------------\n\n")

print("luxury_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["luxury_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["luxury_assets_value"].count())) 
print("Maksimum Değer: " + str(num_columns["luxury_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["luxury_assets_value"].min()))
print("Medyan: " + str(num_columns["luxury_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["luxury_assets_value"].std()))
print("---------------------------------\n\n")

print("bank_asset_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["bank_asset_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["bank_asset_value"].count())) 
print("Maksimum Değer: " + str(num_columns["bank_asset_value"].max()))
print("Minimum Değer: " + str(num_columns["bank_asset_value"].min()))
print("Medyan: " + str(num_columns["bank_asset_value"].median()))
print("Standart Sapma: " + str(num_columns["bank_asset_value"].std()))
print("---------------------------------\n\n")
no_of_dependents
---------------------------------
Ortalama: 2.4987116420707425
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 5
Minimum Değer: 0
Medyan: 3.0
Standart Sapma: 1.695910160711101
---------------------------------


income_annum
---------------------------------
Ortalama: 5059123.9166081045
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 9900000
Minimum Değer: 200000
Medyan: 5100000.0
Standart Sapma: 2806839.831818462
---------------------------------


loan_amount
---------------------------------
Ortalama: 15133450.456781447
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 39500000
Minimum Değer: 300000
Medyan: 14500000.0
Standart Sapma: 9043362.984842854
---------------------------------


loan_term
---------------------------------
Ortalama: 10.900445069102835
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 20
Minimum Değer: 2
Medyan: 10.0
Standart Sapma: 5.7091872792452
---------------------------------


cibil_score
---------------------------------
Ortalama: 599.9360505973295
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 900
Minimum Değer: 300
Medyan: 600.0
Standart Sapma: 172.43040073575904
---------------------------------


residential_assets_value
---------------------------------
Ortalama: 7472616.537830873
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 29100000
Minimum Değer: -100000
Medyan: 5600000.0
Standart Sapma: 6503636.587664101
---------------------------------


commercial_assets_value
---------------------------------
Ortalama: 4973155.3056922
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 19400000
Minimum Değer: 0
Medyan: 3700000.0
Standart Sapma: 4388966.089638461
---------------------------------


luxury_assets_value
---------------------------------
Ortalama: 15126305.926446475
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 39200000
Minimum Değer: 300000
Medyan: 14600000.0
Standart Sapma: 9103753.665256497
---------------------------------


bank_asset_value
---------------------------------
Ortalama: 4976692.433825252
Dolu Gözlem Sayısı: 4269
Maksimum Değer: 14700000
Minimum Değer: 0
Medyan: 4600000.0
Standart Sapma: 3250185.3056957023
---------------------------------


Veri setinde sütunların başlarında boşluk atanıyor otomatik olarak bu da çoğu işlemde sorun yaratmakta bu durumu önlemek adına aşağıdaki kod satırı ile veri setini bu konuda düzenliyoruz.¶
In [25]:
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
df = df.rename(columns=lambda x: x.strip())

plot.barh¶

Eğitim Seviyesine Göre Frekanslar¶

In [35]:
plt.figure(figsize=(10, 6))
df['education'].value_counts().plot.barh(color='skyblue')
plt.title('Eğitim Seviyesine Göre Frekanslar')
plt.xlabel('Frekans')
plt.ylabel('Eğitim Seviyesi')
plt.show()
No description has been provided for this image

Kredi Durumuna Göre Meslek Dağılımı¶

In [36]:
plt.figure(figsize=(10, 6))
df[df['loan_status'] == ' Approved']['self_employed'].value_counts().plot.barh(color='green', label='Approved')
df[df['loan_status'] == ' Rejected']['self_employed'].value_counts().plot.barh(color='red', label='Rejected', alpha=0.5)
plt.title('Kredi Durumuna Göre Meslek Dağılımı')
plt.xlabel('Frekans')
plt.ylabel('Meslek Durumu')
plt.legend()
plt.show()
No description has been provided for this image

Eğitim seviyesine göre kredi onay oranlarının farkı¶

In [37]:
# Eğitim seviyesine göre kredi onay oranlarının farkını inceleme
education_loan_status = df.groupby('education')['loan_status'].value_counts(normalize=True).unstack()

# Horizontal bar plot oluşturma
education_loan_status.plot.barh(stacked=True)
plt.title('Eğitim Seviyesine Göre Kredi Onay Oranları')
plt.xlabel('Onay Oranı')
plt.ylabel('Eğitim Seviyesi')
plt.legend(title='Kredi Durumu', loc='upper right')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1407193328.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  education_loan_status = df.groupby('education')['loan_status'].value_counts(normalize=True).unstack()
No description has been provided for this image

barplot¶

Gelir Seviyesine Göre Kredi Miktarları¶

In [38]:
# Gelir seviyesini belirli aralıklara bölmek
df['income_group'] = pd.cut(df['income_annum'], bins=20)

# Gelir seviyesi gruplarına göre kredi miktarlarının ortalamasını hesaplamak
grouped_df = df.groupby('income_group')['loan_amount'].mean().reset_index()

# Gelir seviyesi gruplarına göre kredi miktarlarını görselleştirmek
sns.barplot(data=grouped_df, x='income_group', y='loan_amount')
plt.title('Gelir Seviyesine Göre Kredi Miktarları')
plt.xlabel('Gelir Grupları')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=90)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1707770033.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_df = df.groupby('income_group')['loan_amount'].mean().reset_index()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Eğitim Seviyesine Göre Ortalama Kredi Miktarı¶

In [39]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='education', y='loan_amount', estimator=np.mean)
plt.title('Eğitim Seviyesine Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı¶

In [40]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='loan_status', y='loan_amount', hue='education', ci=None)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1795589591.py:2: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(data=df, x='loan_status', y='loan_amount', hue='education', ci=None)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Gelir¶

In [41]:
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='loan_status', y='income_annum', hue='education', estimator=np.mean)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Gelir')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Gelir')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

catplot¶

Kredi onay durumu, gelir seviyesi ve eğitim seviyesi arasındaki ilişki¶

In [42]:
# Kredi onay durumu, gelir seviyesi ve eğitim seviyesi arasındaki ilişkiyi görselleştirme
sns.catplot(data=df, x='loan_status', y='income_annum', hue='education', kind='box')
plt.title('Kredi Onay Durumu ve Gelir Seviyesi ile Eğitim Seviyesinin İlişkisi')
plt.xlabel('Kredi Durumu')
plt.ylabel('Gelir (Yıllık)')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Eğitim Seviyesine Göre Kredi Durumu Dağılımı¶

In [43]:
sns.catplot(data=df, x='education', hue='loan_status', kind='count', height=6, aspect=1.5)
plt.title('Eğitim Seviyesine Göre Kredi Durumu Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Gözlem Sayısı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶

In [44]:
sns.catplot(data=df, x='education', y='loan_amount', kind='box', height=6, aspect=1.5)
plt.title('Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Kredi Miktarı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶

In [45]:
sns.catplot(data=df, x='loan_status', y='loan_amount', kind='violin', hue='education', height=6, aspect=1.5)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

groupby¶

In [46]:
# Kredi onay durumuna göre ortalama kredi miktarını inceleme
loan_status_loan_amount = df.groupby('loan_status')['loan_amount'].mean().reset_index()

# Bar plot oluşturma
plt.bar(loan_status_loan_amount['loan_status'], loan_status_loan_amount['loan_amount'])
plt.title('Kredi Onay Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Kredi Miktarı')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\2061942401.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  loan_status_loan_amount = df.groupby('loan_status')['loan_amount'].mean().reset_index()
No description has been provided for this image

Eğitim Seviyesine Göre Ortalama Kredi Miktarı¶

In [47]:
df.groupby('education')['loan_amount'].mean().plot.bar(color='skyblue')
plt.title('Eğitim Seviyesine Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\3426761793.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  df.groupby('education')['loan_amount'].mean().plot.bar(color='skyblue')
No description has been provided for this image

Kredi Durumuna Göre Ortalama Gelir¶

In [48]:
df.groupby('loan_status')['income_annum'].mean().plot.bar(color='lightgreen')
plt.title('Kredi Durumuna Göre Ortalama Gelir')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Gelir')
plt.xticks(rotation=0)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1141899142.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  df.groupby('loan_status')['income_annum'].mean().plot.bar(color='lightgreen')
No description has been provided for this image

Eğitim Seviyesine ve Kredi Durumuna Göre Ortalama Kredi Miktarı¶

In [49]:
df.groupby(['education', 'loan_status'])['loan_amount'].mean().unstack().plot(kind='bar', stacked=True)
plt.title('Eğitim Seviyesine ve Kredi Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\2422632664.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  df.groupby(['education', 'loan_status'])['loan_amount'].mean().unstack().plot(kind='bar', stacked=True)
No description has been provided for this image

histplot¶

In [50]:
# Cibil skoru dağılımını görselleştirme
sns.histplot(data=df, x='cibil_score', kde=True)
plt.title('Cibil Skoru Dağılımı')
plt.xlabel('Cibil Skoru')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Gelir Dağılım¶

In [51]:
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='income_annum', bins=20, kde=True, color='skyblue')
plt.title('Gelir Dağılımı')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Kredi Miktarı Dağılımı¶

In [52]:
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='loan_amount', bins=30, kde=True, color='orange')
plt.title('Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Cibil Skoru Dağılımı¶

In [53]:
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='cibil_score', bins=20, kde=True, color='green')
plt.title('Cibil Skoru Dağılımı')
plt.xlabel('Cibil Skoru')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

kdeplot¶

In [54]:
# Kredi miktarı ve gelir seviyesi arasındaki yoğunluğu görselleştirme
sns.kdeplot(data=df, x='loan_amount', y='income_annum', cmap='Blues', fill=True)
plt.title('Kredi Miktarı ve Gelir Seviyesi Yoğunluğu')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Gelir (Yıllık)')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Gelir Dağılımı (KDE)¶

In [55]:
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='income_annum', hue='loan_status', fill=True, common_norm=False)
plt.title('Gelir Dağılımı (KDE)')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
No description has been provided for this image

Kredi Miktarı Dağılımı (KDE)¶

In [56]:
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='loan_amount', hue='education', fill=True, common_norm=False)
plt.title('Kredi Miktarı Dağılımı (KDE)')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
No description has been provided for this image

Cibil Skoru Dağılımı (KDE)¶

In [57]:
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='cibil_score', hue='loan_status', fill=True, common_norm=False)
plt.title('Cibil Skoru Dağılımı (KDE)')
plt.xlabel('Cibil Skoru')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
No description has been provided for this image

boxplot¶

In [58]:
# Eğitim seviyesine göre cibil skorlarının dağılımını görselleştirme
sns.boxplot(data=df, x='education', y='cibil_score')
plt.title('Eğitim Seviyesine Göre Cibil Skorları Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Cibil Skoru')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Eğitim Seviyesine Göre Gelir Dağılımı¶

In [59]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='education', y='income_annum')
plt.title('Eğitim Seviyesine Göre Gelir Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Gelir (Yıllık)')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶

In [60]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='loan_status', y='loan_amount', hue='education')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Cibil Skoru Dağılımı¶

In [61]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='loan_status', y='cibil_score', hue='education')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Cibil Skoru Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Cibil Skoru')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_vals = vals.groupby(grouper)
No description has been provided for this image

scatterplot¶

In [62]:
# Kredi miktarı ile banka varlıkları arasındaki ilişkiyi görselleştirme
sns.scatterplot(data=df, x='loan_amount', y='bank_asset_value')
plt.title('Kredi Miktarı ile Banka Varlıkları Arasındaki İlişki')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Banka Varlıkları')
plt.show()
No description has been provided for this image

Gelir ve Kredi Miktarı Arasındaki İlişki¶

In [63]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='income_annum', y='loan_amount', hue='loan_status')
plt.title('Gelir ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Kredi Durumu')
plt.show()
No description has been provided for this image

Cibil Skoru ve Kredi Miktarı Arasındaki İlişki¶

In [64]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='cibil_score', y='loan_amount', hue='education')
plt.title('Cibil Skoru ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Eğitim Seviyesi')
plt.show()
No description has been provided for this image

Bağımlı Sayısı ve Kredi Miktarı Arasındaki İlişki¶

In [65]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='no_of_dependents', y='loan_amount', hue='self_employed')
plt.title('Bağımlı Sayısı ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Bağımlı Sayısı')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Meslek Durumu')
plt.show()
No description has been provided for this image

lmplot¶

In [66]:
# Gelir seviyesi ile kredi miktarı arasındaki ilişkiyi görselleştirme ve doğrusal modelleme
sns.lmplot(data=df, x='income_annum', y='loan_amount')
plt.title('Gelir Seviyesi ile Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.show()
No description has been provided for this image

Gelir ve Kredi Miktarı İlişkisi (Lineer Model)¶

In [67]:
sns.lmplot(data=df, x='income_annum', y='loan_amount', hue='loan_status', height=6)
plt.title('Gelir ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.show()
No description has been provided for this image

Cibil Skoru ve Kredi Miktarı İlişkisi (Lineer Model)¶

In [68]:
sns.lmplot(data=df, x='cibil_score', y='loan_amount', hue='education', height=6)
plt.title('Cibil Skoru ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.show()
No description has been provided for this image

Bağımlı Sayısı ve Kredi Miktarı İlişkisi (Lineer Model)¶

In [69]:
sns.lmplot(data=df, x='no_of_dependents', y='loan_amount', hue='self_employed', height=6)
plt.title('Bağımlı Sayısı ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Bağımlı Sayısı')
plt.ylabel('Kredi Miktarı')
plt.show()
No description has been provided for this image

pairplot¶

In [27]:
# Tüm sayısal değişkenler arasındaki ilişkiyi görselleştirme
sns.pairplot(df)
plt.title('Tüm Sayısal Değişkenler Arasındaki İlişki')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Sayısal Değişkenler Arasındaki İlişkiler¶

In [30]:
sns.pairplot(df[['income_annum', 'loan_amount', 'cibil_score']])
plt.title('Sayısal Değişkenler Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image

Sayısal Değişkenler ve Kredi Durumu Arasındaki İlişkiler¶

In [32]:
sns.pairplot(df[['income_annum', 'loan_amount', 'no_of_dependents', 'loan_status']], hue='loan_status')
plt.title('Sayısal Değişkenler ve Kredi Durumu Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
No description has been provided for this image

Kredi Miktarı, Cibil Skoru ve Bağımlı Sayısı Arasındaki İlişkiler¶

In [35]:
sns.pairplot(df[['loan_amount', 'cibil_score', 'no_of_dependents','education']], hue='education')
plt.title('Kredi Miktarı, Cibil Skoru ve Bağımlı Sayısı Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
  data_subset = grouped_data.get_group(pd_key)
No description has been provided for this image

pivot¶

Eğitim Seviyesi ve Kredi Durumuna Göre Ortalama Kredi Miktarı¶

In [38]:
pivot_table = df.pivot_table(index='education', columns='loan_status', values='loan_amount', aggfunc='mean')
pivot_table.plot(kind='bar', figsize=(10, 6))
plt.title('Eğitim Seviyesi ve Kredi Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\364336002.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior
  pivot_table = df.pivot_table(index='education', columns='loan_status', values='loan_amount', aggfunc='mean')
No description has been provided for this image

Meslek Durumu ve Kredi Durumuna Göre Ortalama Gelir¶

In [40]:
pivot_table2 = df.pivot_table(index='self_employed', columns='loan_status', values='income_annum', aggfunc='mean')
pivot_table2.plot(kind='bar', figsize=(10, 6))
plt.title('Meslek Durumu ve Kredi Durumuna Göre Ortalama Gelir')
plt.xlabel('Meslek Durumu')
plt.ylabel('Ortalama Gelir (Yıllık)')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\980057846.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior
  pivot_table2 = df.pivot_table(index='self_employed', columns='loan_status', values='income_annum', aggfunc='mean')
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru¶

In [42]:
pivot_table3 = df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean')
pivot_table3.plot(kind='bar', figsize=(10, 6))
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Cibil Skoru')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\4131333369.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior
  pivot_table3 = df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean')
No description has been provided for this image

heatmap¶

Değişkenler Arasındaki Korelasyon¶

In [45]:
correlation_matrix = num_columns.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Değişkenler Arasındaki Korelasyon')
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[45], line 1
----> 1 correlation_matrix = num_columns.corr()
      2 plt.figure(figsize=(10, 8))
      3 sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')

NameError: name 'num_columns' is not defined

Eğitim ve Meslek Durumuna Göre Ortalama Kredi Miktarı¶

In [52]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.pivot_table(index='education', columns='self_employed', values='loan_amount', aggfunc='mean'), annot=True, cmap='viridis')
plt.title('Eğitim ve Meslek Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Meslek Durumu')
plt.ylabel('Eğitim Seviyesi')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\619796474.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior
  sns.heatmap(df.pivot_table(index='education', columns='self_employed', values='loan_amount', aggfunc='mean'), annot=True, cmap='viridis')
No description has been provided for this image

Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru¶

In [54]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean'), annot=True, cmap='rocket')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Kredi Durumu')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2180551148.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior
  sns.heatmap(df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean'), annot=True, cmap='rocket')
No description has been provided for this image

lineplot¶

Kredi Süresi ve Kredi Miktarı İlişkisi¶

In [ ]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='loan_term', y='loan_amount', hue='loan_status')
plt.title('Kredi Süresi ve Kredi Miktarı İlişkisi')
plt.xlabel('Kredi Süresi (Ay)')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Kredi Durumu')
plt.show()

Cibil Skoru ve Kredi Miktarı İlişkisi¶

In [ ]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='cibil_score', y='loan_amount', hue='education')
plt.title('Cibil Skoru ve Kredi Miktarı İlişkisi')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Eğitim Seviyesi')
plt.show()

Kredi Miktarı ve Gelir İlişkisi¶

In [ ]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='loan_amount', y='income_annum', hue='loan_status')
plt.title('Kredi Miktarı ve Gelir İlişkisi')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Gelir (Yıllık)')
plt.legend(title='Kredi Durumu')
plt.show()

İstatistik¶

1.Örneklem¶

In [ ]:
# Veri setinin %10'luk bir örneklemini alma
sample_df = df.sample(frac=0.1, random_state=1)

# Örneklem verisini görüntüleme
print(sample_df.head())

2.Betimsel İstatistik¶

Varyans Hesaplama Öncelikle, örneklem verimizin belirli sütunları için varyans hesaplayalım. Varyans, bir veri kümesinin yayılımını ölçen bir istatistiksel değerdir.

In [ ]:
# Varyans hesaplama
variances = sample_df.var(numeric_only=True)
print("Varyanslar:")
print(variances)

Kovaryans Hesaplama Kovaryans, iki değişkenin birlikte nasıl değiştiğini ölçer. Tüm çiftler için kovaryans matrisini hesaplayabiliriz.

In [ ]:
# Kovaryans matrisini hesaplama
cov_matrix = sample_df.cov(numeric_only=True)
print("Kovaryans Matrisi:")
print(cov_matrix)

Korelasyon Hesaplama Korelasyon, iki değişken arasındaki doğrusal ilişkiyi ölçer. Tüm çiftler için korelasyon matrisini hesaplayabiliriz.

In [ ]:
# Korelasyon matrisini hesaplama
corr_matrix = sample_df.corr(numeric_only=True)
print("Korelasyon Matrisi:")
print(corr_matrix)

3.Fiyat Stratejisi Karar Destek¶

Bu adımda, genellikle gelir ve kredi miktarı gibi ekonomik değişkenlerin analizi yapılır. Bu analizde, örneklem verimizdeki bazı sütunların istatistiksel özetlerini kullanarak karar destek sistemi oluşturabiliriz.

In [ ]:
# Gelir ve kredi miktarının özet istatistikleri
income_stats = sample_df['income_annum'].describe()
loan_amount_stats = sample_df['loan_amount'].describe()

print("Gelir İstatistikleri:")
print(income_stats)

print("\nKredi Miktarı İstatistikleri:")
print(loan_amount_stats)

Gelir ve kredi miktarı arasındaki korelasyonu hesaplayalım:

In [ ]:
# Gelir ve kredi miktarı arasındaki korelasyon
income_loan_corr = sample_df['income_annum'].corr(sample_df['loan_amount'])
print("\nGelir ve Kredi Miktarı Arasındaki Korelasyon: {:.2f}".format(income_loan_corr))

4.Olasılık Dağılımları¶

Bernoulli Dağılımı¶

Bernoulli dağılımı, iki olası sonuç (başarı ve başarısızlık) içeren deneyler için kullanılır. self_employed veya loan_status sütunları, Bernoulli dağılımı için uygun olabilir.

In [ ]:
from scipy.stats import bernoulli

# self_employed sütunu için Bernoulli dağılımı
p_self_employed = sample_df['self_employed'].value_counts(normalize=True)[' Yes']
bernoulli_dist = bernoulli(p_self_employed)

print("Bernoulli Dağılımı (self_employed 'Yes' olma olasılığı): {:.2f}".format(p_self_employed))
print("Beklenen değer (mean): {:.2f}".format(bernoulli_dist.mean()))
print("Varyans: {:.2f}".format(bernoulli_dist.var()))
Binom Dağılımı¶

Binom dağılımı, belirli bir sayıda bağımsız Bernoulli denemesinin sonuçlarını modellemek için kullanılır.

In [ ]:
from scipy.stats import binom

# Örnek: 10 denemede self_employed olanların sayısı
n = 10
binom_dist = binom(n, p_self_employed)

print("Binom Dağılımı (10 denemede 'Yes' sayısı):")
print("Beklenen değer (mean): {:.2f}".format(binom_dist.mean()))
print("Varyans: {:.2f}".format(binom_dist.var()))
for k in range(n + 1):
    print(f"10 denemede {k} kez 'Yes' olma olasılığı: {binom_dist.pmf(k):.4f}")
Geometrik Dağılım¶

Geometrik dağılım, ilk başarıya kadar geçen deneme sayısını modellemek için kullanılır.

In [ ]:
from scipy.stats import geom

geom_dist = geom(p_self_employed)

print("Geometrik Dağılım (ilk 'Yes' olana kadar geçen deneme sayısı):")
print("Beklenen değer (mean): {:.2f}".format(geom_dist.mean()))
print("Varyans: {:.2f}".format(geom_dist.var()))
for k in range(1, 11):
    print(f"İlk 'Yes' için {k}. deneme olasılığı: {geom_dist.pmf(k):.4f}")
Poisson Dağılımı¶

Poisson dağılımı, belirli bir zaman aralığında belirli olayların sayısını modellemek için kullanılır.

In [ ]:
from scipy.stats import poisson

# Örnek: Saatte 2.5 kredi başvurusu ortalaması ile Poisson dağılımı
lambda_ = 2.5
poisson_dist = poisson(mu=lambda_)

print("Poisson Dağılımı (ortalama 2.5 kredi başvurusu):")
print("Beklenen değer (mean): {:.2f}".format(poisson_dist.mean()))
print("Varyans: {:.2f}".format(poisson_dist.var()))
for k in range(10):
    print(f"{k} kredi başvurusu olasılığı: {poisson_dist.pmf(k):.4f}")
Normal Dağılım¶

Normal dağılım, sürekli veri için en yaygın dağılımdır. income_annum veya loan_amount sütunları normal dağılımı analiz etmek için kullanılabilir.

In [ ]:
from scipy.stats import norm

# Gelir verisi için normal dağılım
mean_income = sample_df['income_annum'].mean()
std_income = sample_df['income_annum'].std()
norm_dist = norm(mean_income, std_income)

print("Normal Dağılım (gelir verisi):")
print("Beklenen değer (mean): {:.2f}".format(norm_dist.mean()))
print("Varyans: {:.2f}".format(norm_dist.var()))
print(f"68% olasılıkla gelir {norm_dist.ppf(0.16):.2f} ile {norm_dist.ppf(0.84):.2f} arasında olacaktır.")
Üssel Dağılım¶

Üssel dağılım, olaylar arasındaki süreyi modellemek için kullanılır.

In [ ]:
from scipy.stats import expon

# Örnek: Ortalama 1 kredi başvuru süresi ile üssel dağılım
lambda_exp = 1.0
expon_dist = expon(scale=1/lambda_exp)

print("Üssel Dağılım (ortalama 1 kredi başvuru süresi):")
print("Beklenen değer (mean): {:.2f}".format(expon_dist.mean()))
print("Varyans: {:.2f}".format(expon_dist.var()))
for k in range(1, 11):
    print(f"{k} zaman birimi içinde olay gerçekleşme olasılığı: {expon_dist.cdf(k):.4f}")

5.Hipotez Testi¶

Tek Örneklem T Testi¶

Tek örneklem T testi, bir örneklem ortalamasının bilinen bir popülasyon ortalamasına eşit olup olmadığını test eder. Örneğin, gelir verisinin belirli bir değere eşit olup olmadığını test edebiliriz.

In [ ]:
from scipy.stats import ttest_1samp

# Örnek: Gelir verisinin ortalamasının 5,000,000'a eşit olup olmadığını test etme
popmean = 5000000
t_stat, p_value = ttest_1samp(sample_df['income_annum'], popmean)

print("Tek Örneklem T Testi:")
print("T-istatistiği: {:.2f}".format(t_stat))
print("p-değeri: {:.4f}".format(p_value))
Bağımsız İki Örneklem T Testi (AB Testi)¶

Bu test, iki bağımsız örneklemin ortalamalarının eşit olup olmadığını test eder. Örneğin, self_employed olan ve olmayan kişilerin gelir ortalamalarını karşılaştırabiliriz.

In [ ]:
from scipy.stats import ttest_ind

# Örnek: self_employed olan ve olmayan kişilerin gelir ortalamalarını karşılaştırma
income_self_employed = sample_df[sample_df['self_employed'] == ' Yes']['income_annum']
income_not_self_employed = sample_df[sample_df['self_employed'] == ' No']['income_annum']

t_stat, p_value = ttest_ind(income_self_employed, income_not_self_employed, equal_var=False)

print("Bağımsız İki Örneklem T Testi (self_employed 'Yes' ve 'No' için):")
print("T-istatistiği: {:.2f}".format(t_stat))
print("p-değeri: {:.4f}".format(p_value))
İki Örneklem Oran Testi¶

Bu test, iki oran arasındaki farkın istatistiksel olarak anlamlı olup olmadığını test eder. Örneğin, loan_status oranlarını karşılaştırabiliriz.

In [ ]:
from scipy.stats import chi2_contingency

# 'Approved' ve 'Rejected' gruplarının gözlem sayılarını alalım
approved_count = sample_df['loan_status'].value_counts()[' Approved']
rejected_count = sample_df['loan_status'].value_counts()[' Rejected']

# Toplam gözlem sayısını bulalım
total_obs = len(sample_df)

# Beklenen frekansları hesaplayalım (Approved oranı * toplam gözlem sayısı)
expected_approved = total_obs * (approved_count / total_obs)
expected_rejected = total_obs * (rejected_count / total_obs)

# Gözlemlenen ve beklenen frekansları içeren bir tabloyu oluşturalım
observed = [approved_count, rejected_count]
expected = [expected_approved, expected_rejected]

# Chi-kare testini yapalım
chi2_stat, p_value, dof, expected = chi2_contingency([observed, expected])

print("Chi-kare Testi:")
print("Chi-kare istatistiği: {:.2f}".format(chi2_stat))
print("p-değeri: {:.4f}".format(p_value))

Chi-kare testi sonucu istatistiksel olarak anlamlı bir ilişki tespit etmediğini gösteriyor. P-değeri 0.05'ten büyük olduğu için, 'Approved' ve 'Rejected' grupları arasında anlamlı bir fark olmadığını söyleyebiliriz.

Bu sonuç, 'loan_status' değişkeninin 'Approved' ve 'Rejected' grupları arasında dağılımının rastgele olmadığını, yani belirli bir tür kredi başvurusunun diğerinden daha yaygın olmadığını gösteriyor.

Eğer başka bir analiz veya sorunuz varsa, lütfen belirtin, yardımcı olmaktan memnuniyet duyarım.

Varyans Analizi (ANOVA)¶

ANOVA, üç veya daha fazla grup ortalamaları arasındaki farkları test eder. Örneğin, education değişkenine göre gelir farklılıklarını test edebiliriz.

In [ ]:
from scipy.stats import f_oneway

# Örnek: education değişkenine göre gelir farklılıkları
income_graduate = sample_df[sample_df['education'] == ' Graduate']['income_annum']
income_not_graduate = sample_df[sample_df['education'] == ' Not Graduate']['income_annum']

f_stat, p_value = f_oneway(income_graduate, income_not_graduate)

print("Varyans Analizi (ANOVA):")
print("F-istatistiği: {:.2f}".format(f_stat))
print("p-değeri: {:.4f}".format(p_value))
Korelasyon Analizi¶

Korelasyon analizi, iki değişken arasındaki ilişkinin gücünü ve yönünü belirler. Örneğin, gelir ve kredi miktarı arasındaki korelasyonu analiz edebiliriz.

In [ ]:
# Gelir ve kredi miktarı arasındaki korelasyon
income_loan_corr = sample_df['income_annum'].corr(sample_df['loan_amount'])
print("\nGelir ve Kredi Miktarı Arasındaki Korelasyon: {:.2f}".format(income_loan_corr))

Veri Ön İşleme

loan_id değerine ihtiyacımız olmayacağı için o sütunu siliyorum ve category türündeki değişkenlerimi sınıflandırma problemine hazırlayabilmek adına sayısal verilere dönüştüreceğim.¶
In [56]:
#df_number = df.drop(columns=['loan_id','income_group'],inplace=False)
df_number = df.drop(columns=['loan_id'],inplace=False)
df_number = df_number.select_dtypes(include=["int64"])
#df_category = df.drop(columns=['loan_id','income_group'],inplace=False)
df_category = df.drop(columns=['loan_id'],inplace=False)
df_category = df_category.select_dtypes(include=["category"])
In [58]:
df_number.head()
Out[58]:
no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
0 2 9600000 29900000 12 778 2400000 17600000 22700000 8000000
1 0 4100000 12200000 8 417 2700000 2200000 8800000 3300000
2 3 9100000 29700000 20 506 7100000 4500000 33300000 12800000
3 3 8200000 30700000 8 467 18200000 3300000 23300000 7900000
4 5 9800000 24200000 20 382 12400000 8200000 29400000 5000000
In [60]:
df_category.head()
Out[60]:
education self_employed loan_status
0 Graduate No Approved
1 Not Graduate Yes Rejected
2 Graduate No Rejected
3 Graduate No Rejected
4 Not Graduate Yes Rejected

Aykırı Gözlem analizi (Outlier Analysis)

In [62]:
import seaborn as sns
import matplotlib.pyplot as plt

# Tüm sayısal değişkenler için kutu grafiği oluşturma
plt.figure(figsize=(13,12))
for i, col in enumerate(df_number, 1):
    plt.subplot(3, 4, i)
    sns.boxplot(x=df_number[col])
    plt.title('Boxplot for {}'.format(col))
plt.tight_layout()
plt.show()
No description has been provided for this image
In [64]:
# "residential_assets_value" ve "commercial_assets_value" değişkenlerinin histogramları ve kutu grafikleri
plt.figure(figsize=(15, 6))

# residential_assets_value histogram ve kutu grafiği
plt.subplot(1, 2, 1)
sns.histplot(df_number['residential_assets_value'], kde=True)
plt.title('Histogram for residential_assets_value')

plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['residential_assets_value'])
plt.title('Boxplot for residential_assets_value')

plt.tight_layout()
plt.show()

# commercial_assets_value histogram ve kutu grafiği
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
sns.histplot(df_number['commercial_assets_value'], kde=True)
plt.title('Histogram for commercial_assets_value')

plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['commercial_assets_value'])
plt.title('Boxplot for commercial_assets_value')

plt.tight_layout()
plt.show()

# "residential_assets_value" ve "commercial_assets_value" değişkenlerinin histogramları ve kutu grafikleri
plt.figure(figsize=(15, 6))

# residential_assets_value histogram ve kutu grafiği
plt.subplot(1, 2, 1)
sns.histplot(df_number['bank_asset_value'], kde=True)
plt.title('Histogram for bank_asset_value')

plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['bank_asset_value'])
plt.title('Boxplot for bank_asset_value')

plt.tight_layout()
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
No description has been provided for this image
In [65]:
import seaborn as sns
import matplotlib.pyplot as plt

# "residential_assets_value", "commercial_assets_value" ve "bank_assets_value" değişkenlerinin boxplotları
plt.figure(figsize=(18, 6))

# residential_assets_value boxplot
plt.subplot(1, 3, 1)
sns.boxplot(x=df['residential_assets_value'])
plt.title('Boxplot for residential_assets_value')

# commercial_assets_value boxplot
plt.subplot(1, 3, 2)
sns.boxplot(x=df['commercial_assets_value'])
plt.title('Boxplot for commercial_assets_value')

# bank_assets_value boxplot
plt.subplot(1, 3, 3)
sns.boxplot(x=df['bank_asset_value'])
plt.title('Boxplot for bank_asset_value')

plt.tight_layout()
plt.show()
No description has been provided for this image
1.residential_assets_value (ortalama değer ile aykırı değer çözme)¶
In [69]:
df_number_residential_assets_value = df_number["residential_assets_value"]
In [71]:
df_number_residential_assets_value.head()
Out[71]:
0     2400000
1     2700000
2     7100000
3    18200000
4    12400000
Name: residential_assets_value, dtype: int64
In [73]:
sns.boxplot(x = df_number_residential_assets_value);
No description has been provided for this image
In [75]:
Q1 = df_number_residential_assets_value.quantile(0.25)
Q3 = df_number_residential_assets_value.quantile(0.75)
IQR = Q3-Q1
In [77]:
print(Q1)
print(Q3)
print(IQR)
2200000.0
11300000.0
9100000.0
In [79]:
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
In [81]:
print(alt_sinir)
print(ust_sinir)
-11450000.0
24950000.0
In [83]:
(df_number_residential_assets_value < alt_sinir) | (df_number_residential_assets_value > ust_sinir)
Out[83]:
0       False
1       False
2       False
3       False
4       False
        ...  
4264    False
4265    False
4266    False
4267    False
4268    False
Name: residential_assets_value, Length: 4269, dtype: bool
In [85]:
aykiri_tf = (df_number_residential_assets_value < alt_sinir) | (df_number_residential_assets_value > ust_sinir)
df_number_residential_assets_value[aykiri_tf]
Out[85]:
82      25500000
98      25900000
123     26800000
228     25500000
262     25600000
693     25400000
714     28300000
781     26300000
892     27600000
905     28700000
919     25300000
924     25800000
953     25500000
956     25300000
987     28200000
997     25200000
1002    25800000
1397    25700000
1419    25500000
1468    27000000
1591    28500000
1625    26300000
1965    28400000
1997    27000000
2185    25900000
2318    28500000
2384    26600000
2412    26200000
2586    28000000
2715    25800000
2818    25500000
2828    26900000
2922    25200000
2927    27600000
2930    25300000
2940    26100000
3119    29100000
3157    25400000
3234    28200000
3310    25100000
3498    25600000
3631    25400000
3763    26100000
3782    27500000
3860    25000000
3868    25500000
3872    25400000
3880    26200000
4027    25100000
4042    27400000
4074    27300000
4237    26200000
Name: residential_assets_value, dtype: int64
In [87]:
df_number_residential_assets_value[aykiri_tf].index
Out[87]:
Index([  82,   98,  123,  228,  262,  693,  714,  781,  892,  905,  919,  924,
        953,  956,  987,  997, 1002, 1397, 1419, 1468, 1591, 1625, 1965, 1997,
       2185, 2318, 2384, 2412, 2586, 2715, 2818, 2828, 2922, 2927, 2930, 2940,
       3119, 3157, 3234, 3310, 3498, 3631, 3763, 3782, 3860, 3868, 3872, 3880,
       4027, 4042, 4074, 4237],
      dtype='int64')
In [89]:
df_number_residential_assets_value = pd.DataFrame(df_number_residential_assets_value)
df_number_residential_assets_value.shape
Out[89]:
(4269, 1)
In [91]:
df_number_residential_assets_value.mean()
Out[91]:
residential_assets_value    7.472617e+06
dtype: float64
In [93]:
df_number.loc[aykiri_tf, 'residential_assets_value'] = df_number['residential_assets_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\1074059456.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7472616.537830873' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_number.loc[aykiri_tf, 'residential_assets_value'] = df_number['residential_assets_value'].mean()
In [95]:
df_number
Out[95]:
no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
0 2 9600000 29900000 12 778 2400000.0 17600000 22700000 8000000
1 0 4100000 12200000 8 417 2700000.0 2200000 8800000 3300000
2 3 9100000 29700000 20 506 7100000.0 4500000 33300000 12800000
3 3 8200000 30700000 8 467 18200000.0 3300000 23300000 7900000
4 5 9800000 24200000 20 382 12400000.0 8200000 29400000 5000000
... ... ... ... ... ... ... ... ... ...
4264 5 1000000 2300000 12 317 2800000.0 500000 3300000 800000
4265 0 3300000 11300000 20 559 4200000.0 2900000 11000000 1900000
4266 2 6500000 23900000 18 457 1200000.0 12400000 18100000 7300000
4267 1 4100000 12800000 8 780 8200000.0 700000 14100000 5800000
4268 1 9200000 29700000 10 607 17800000.0 11800000 35700000 12000000

4269 rows × 9 columns

In [97]:
sns.boxplot(x = df_number.residential_assets_value);
No description has been provided for this image
2.commercial_assets_value (ortalama değer ile aykırı değer çözme)¶
In [100]:
df_number_commercial_assets_value = df_number["commercial_assets_value"]
In [102]:
df_number_commercial_assets_value.head()
Out[102]:
0    17600000
1     2200000
2     4500000
3     3300000
4     8200000
Name: commercial_assets_value, dtype: int64
In [104]:
sns.boxplot(x = df_number_commercial_assets_value);
No description has been provided for this image
In [106]:
Q1 = df_number_commercial_assets_value.quantile(0.25)
Q3 = df_number_commercial_assets_value.quantile(0.75)
IQR = Q3-Q1
In [108]:
print(Q1)
print(Q3)
print(IQR)
1300000.0
7600000.0
6300000.0
In [110]:
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
In [112]:
print(alt_sinir)
print(ust_sinir)
-8150000.0
17050000.0
In [114]:
(df_number_commercial_assets_value < alt_sinir) | (df_number_commercial_assets_value > ust_sinir)
Out[114]:
0        True
1       False
2       False
3       False
4       False
        ...  
4264    False
4265    False
4266    False
4267    False
4268    False
Name: commercial_assets_value, Length: 4269, dtype: bool
In [116]:
aykiri_tf = (df_number_commercial_assets_value < alt_sinir) | (df_number_commercial_assets_value > ust_sinir)
df_number_commercial_assets_value[aykiri_tf]
Out[116]:
0       17600000
157     18700000
231     17500000
258     19000000
323     17200000
367     18500000
554     18800000
791     17800000
895     18500000
905     17900000
1029    18300000
1131    17300000
1194    17300000
1254    18900000
1272    18400000
1304    18200000
1609    17300000
1761    19000000
1768    19400000
1812    17800000
2004    17500000
2302    17900000
2349    17400000
2914    18500000
2933    17600000
2940    19000000
2976    17700000
3000    18800000
3439    18400000
3541    17200000
3790    18400000
3827    17700000
3882    18500000
3949    17600000
4010    17600000
4120    17900000
4205    17600000
Name: commercial_assets_value, dtype: int64
In [118]:
df_number_commercial_assets_value[aykiri_tf].index
Out[118]:
Index([   0,  157,  231,  258,  323,  367,  554,  791,  895,  905, 1029, 1131,
       1194, 1254, 1272, 1304, 1609, 1761, 1768, 1812, 2004, 2302, 2349, 2914,
       2933, 2940, 2976, 3000, 3439, 3541, 3790, 3827, 3882, 3949, 4010, 4120,
       4205],
      dtype='int64')
In [120]:
df_number_commercial_assets_value = pd.DataFrame(df_number_commercial_assets_value)
df_number_commercial_assets_value.shape
Out[120]:
(4269, 1)
In [122]:
df_number_commercial_assets_value.mean()
Out[122]:
commercial_assets_value    4.973155e+06
dtype: float64
In [124]:
df_number.loc[aykiri_tf, 'commercial_assets_value'] = df_number['commercial_assets_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2185203214.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4973155.3056922' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_number.loc[aykiri_tf, 'commercial_assets_value'] = df_number['commercial_assets_value'].mean()
In [126]:
df_number
Out[126]:
no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
0 2 9600000 29900000 12 778 2400000.0 4.973155e+06 22700000 8000000
1 0 4100000 12200000 8 417 2700000.0 2.200000e+06 8800000 3300000
2 3 9100000 29700000 20 506 7100000.0 4.500000e+06 33300000 12800000
3 3 8200000 30700000 8 467 18200000.0 3.300000e+06 23300000 7900000
4 5 9800000 24200000 20 382 12400000.0 8.200000e+06 29400000 5000000
... ... ... ... ... ... ... ... ... ...
4264 5 1000000 2300000 12 317 2800000.0 5.000000e+05 3300000 800000
4265 0 3300000 11300000 20 559 4200000.0 2.900000e+06 11000000 1900000
4266 2 6500000 23900000 18 457 1200000.0 1.240000e+07 18100000 7300000
4267 1 4100000 12800000 8 780 8200000.0 7.000000e+05 14100000 5800000
4268 1 9200000 29700000 10 607 17800000.0 1.180000e+07 35700000 12000000

4269 rows × 9 columns

In [128]:
sns.boxplot(x = df_number.commercial_assets_value);
No description has been provided for this image
3.bank_asset_value (ortalama değer ile aykırı değer çözme)¶
In [131]:
df_number_bank_asset_value = df_number["bank_asset_value"]
In [133]:
df_number_bank_asset_value.head()
Out[133]:
0     8000000
1     3300000
2    12800000
3     7900000
4     5000000
Name: bank_asset_value, dtype: int64
In [135]:
sns.boxplot(x = df_number_bank_asset_value);
No description has been provided for this image
In [137]:
Q1 = df_number_bank_asset_value.quantile(0.25)
Q3 = df_number_bank_asset_value.quantile(0.75)
IQR = Q3-Q1
In [139]:
print(Q1)
print(Q3)
print(IQR)
2300000.0
7100000.0
4800000.0
In [141]:
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
In [143]:
print(alt_sinir)
print(ust_sinir)
-4900000.0
14300000.0
In [145]:
(df_number_bank_asset_value < alt_sinir) | (df_number_bank_asset_value > ust_sinir)
Out[145]:
0       False
1       False
2       False
3       False
4       False
        ...  
4264    False
4265    False
4266    False
4267    False
4268    False
Name: bank_asset_value, Length: 4269, dtype: bool
In [147]:
aykiri_tf = (df_number_bank_asset_value < alt_sinir) | (df_number_bank_asset_value > ust_sinir)
df_number_bank_asset_value[aykiri_tf]
Out[147]:
200     14400000
1272    14700000
1633    14600000
1674    14600000
1805    14700000
Name: bank_asset_value, dtype: int64
In [149]:
df_number_bank_asset_value[aykiri_tf].index
Out[149]:
Index([200, 1272, 1633, 1674, 1805], dtype='int64')
In [151]:
df_number_bank_asset_value = pd.DataFrame(df_number_bank_asset_value)
df_number_bank_asset_value.shape
Out[151]:
(4269, 1)
In [153]:
df_number_bank_asset_value.mean()
Out[153]:
bank_asset_value    4.976692e+06
dtype: float64
In [155]:
df_number.loc[aykiri_tf, 'bank_asset_value'] = df_number['bank_asset_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2284535543.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4976692.433825252' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_number.loc[aykiri_tf, 'bank_asset_value'] = df_number['bank_asset_value'].mean()
In [157]:
df_number
Out[157]:
no_of_dependents income_annum loan_amount loan_term cibil_score residential_assets_value commercial_assets_value luxury_assets_value bank_asset_value
0 2 9600000 29900000 12 778 2400000.0 4.973155e+06 22700000 8000000.0
1 0 4100000 12200000 8 417 2700000.0 2.200000e+06 8800000 3300000.0
2 3 9100000 29700000 20 506 7100000.0 4.500000e+06 33300000 12800000.0
3 3 8200000 30700000 8 467 18200000.0 3.300000e+06 23300000 7900000.0
4 5 9800000 24200000 20 382 12400000.0 8.200000e+06 29400000 5000000.0
... ... ... ... ... ... ... ... ... ...
4264 5 1000000 2300000 12 317 2800000.0 5.000000e+05 3300000 800000.0
4265 0 3300000 11300000 20 559 4200000.0 2.900000e+06 11000000 1900000.0
4266 2 6500000 23900000 18 457 1200000.0 1.240000e+07 18100000 7300000.0
4267 1 4100000 12800000 8 780 8200000.0 7.000000e+05 14100000 5800000.0
4268 1 9200000 29700000 10 607 17800000.0 1.180000e+07 35700000 12000000.0

4269 rows × 9 columns

In [159]:
sns.boxplot(x = df_number.bank_asset_value);
No description has been provided for this image

Kategorik Değişkenlerin Sürekliye çevrilmesi¶

In [162]:
df_category.head()
Out[162]:
education self_employed loan_status
0 Graduate No Approved
1 Not Graduate Yes Rejected
2 Graduate No Rejected
3 Graduate No Rejected
4 Not Graduate Yes Rejected
In [164]:
df_category = pd.get_dummies(df_category, columns = ["education"], prefix = ["education"])
df_category
Out[164]:
self_employed loan_status education_ Graduate education_ Not Graduate
0 No Approved True False
1 Yes Rejected False True
2 No Rejected True False
3 No Rejected True False
4 Yes Rejected False True
... ... ... ... ...
4264 Yes Rejected True False
4265 Yes Approved False True
4266 No Rejected False True
4267 No Approved False True
4268 No Approved True False

4269 rows × 4 columns

In [166]:
df_category = pd.get_dummies(df_category, columns = ["self_employed"], prefix = ["self_employed"])
df_category
Out[166]:
loan_status education_ Graduate education_ Not Graduate self_employed_ No self_employed_ Yes
0 Approved True False True False
1 Rejected False True False True
2 Rejected True False True False
3 Rejected True False True False
4 Rejected False True False True
... ... ... ... ... ...
4264 Rejected True False False True
4265 Approved False True False True
4266 Rejected False True True False
4267 Approved False True True False
4268 Approved True False True False

4269 rows × 5 columns

In [168]:
df_category = pd.get_dummies(df_category, columns = ["loan_status"], prefix = ["loan_status"])
df_category
Out[168]:
education_ Graduate education_ Not Graduate self_employed_ No self_employed_ Yes loan_status_ Approved loan_status_ Rejected
0 True False True False True False
1 False True False True False True
2 True False True False False True
3 True False True False False True
4 False True False True False True
... ... ... ... ... ... ...
4264 True False False True False True
4265 False True False True True False
4266 False True True False False True
4267 False True True False True False
4268 True False True False True False

4269 rows × 6 columns

In [170]:
df_category
Out[170]:
education_ Graduate education_ Not Graduate self_employed_ No self_employed_ Yes loan_status_ Approved loan_status_ Rejected
0 True False True False True False
1 False True False True False True
2 True False True False False True
3 True False True False False True
4 False True False True False True
... ... ... ... ... ... ...
4264 True False False True False True
4265 False True False True True False
4266 False True True False False True
4267 False True True False True False
4268 True False True False True False

4269 rows × 6 columns

In [172]:
df_category = df_category.astype(int) ## TRUE FALSE HEPSİNİ SAYIYA ÇEVİRDİK.
In [174]:
df_category
Out[174]:
education_ Graduate education_ Not Graduate self_employed_ No self_employed_ Yes loan_status_ Approved loan_status_ Rejected
0 1 0 1 0 1 0
1 0 1 0 1 0 1
2 1 0 1 0 0 1
3 1 0 1 0 0 1
4 0 1 0 1 0 1
... ... ... ... ... ... ...
4264 1 0 0 1 0 1
4265 0 1 0 1 1 0
4266 0 1 1 0 0 1
4267 0 1 1 0 1 0
4268 1 0 1 0 1 0

4269 rows × 6 columns

In [176]:
ml_df = pd.concat([df_number,df_category],axis=1)
In [178]:
ml_df.rename(columns=lambda x: x.replace('_ ', '_'), inplace=True)
In [180]:
print(ml_df.columns)
ml_df.loan_status_Approved
Index(['no_of_dependents', 'income_annum', 'loan_amount', 'loan_term',
       'cibil_score', 'residential_assets_value', 'commercial_assets_value',
       'luxury_assets_value', 'bank_asset_value', 'education_Graduate',
       'education_Not Graduate', 'self_employed_No', 'self_employed_Yes',
       'loan_status_Approved', 'loan_status_Rejected'],
      dtype='object')
Out[180]:
0       1
1       0
2       0
3       0
4       0
       ..
4264    0
4265    1
4266    0
4267    1
4268    1
Name: loan_status_Approved, Length: 4269, dtype: int32

Makine Öğrenmesi¶

In [186]:
import numpy as np
import pandas as pd 
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
from sklearn.preprocessing import scale 
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score,roc_curve
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier


from sklearn.preprocessing import StandardScaler  
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from skompiler import skompile



from warnings import filterwarnings
filterwarnings('ignore')

Lojistik Regresyon¶

Model¶
In [ ]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [509]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
In [398]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
Out[398]:
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
In [400]:
#scikit-learn
loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X,y)
loj_model
Out[400]:
LogisticRegression(solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(solver='liblinear')
In [402]:
loj_model.intercept_
Out[402]:
array([9.94034034e-10])
In [404]:
loj_model.coef_
Out[404]:
array([[ 2.04994562e-09, -9.99506665e-08,  5.05309185e-08,
         8.28344107e-10,  2.65294943e-06, -4.04025410e-09,
         2.13252137e-08, -3.17712877e-09,  1.80822711e-08,
         5.32102476e-10,  4.61931559e-10,  4.81703437e-10,
         5.12330597e-10, -7.07921933e-09]])

Tahmin & Model Tuning

In [407]:
y_pred = log_reg.predict(X_test)
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

           0       0.41      0.03      0.05       471
           1       0.63      0.98      0.77       810

    accuracy                           0.63      1281
   macro avg       0.52      0.50      0.41      1281
weighted avg       0.55      0.63      0.50      1281

Bu rapora göre, modelin "1" sınıfını tahmin etme yeteneği oldukça yüksek görünüyor (hassasiyet ve geri çağrı yüksek), ancak "0" sınıfını tahmin etme yeteneği oldukça zayıf (düşük hassasiyet ve geri çağrı). Bu, modelin "1" sınıfını iyi tahmin ettiğini ancak "0" sınıfını çok az tahmin ettiğini gösterir.

modele ilişkin olasılık değerleri

In [411]:
log_reg.predict(X)[0:10]
Out[411]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
In [413]:
y[0:10]
Out[413]:
0    1
1    0
2    0
3    0
4    0
5    0
6    1
7    0
8    1
9    0
Name: loan_status_Approved, dtype: int32
In [415]:
#modele ilişkin olasılık değerleri. ilk sutun '0' ikinci sutun '1' sınıfına ait olma olasılık
log_reg.predict_proba(X)[0:10]
Out[415]:
array([[0.33662306, 0.66337694],
       [0.44093498, 0.55906502],
       [0.29769414, 0.70230586],
       [0.33676859, 0.66323141],
       [0.42993152, 0.57006848],
       [0.40332827, 0.59667173],
       [0.29282617, 0.70717383],
       [0.44204698, 0.55795302],
       [0.48961826, 0.51038174],
       [0.4671262 , 0.5328738 ]])

esikleme isleminin el ile yapilmasi

In [418]:
y_probs = log_reg.predict_proba(X)
y_probs = y_probs[:,1]
In [420]:
y_probs[0:10]
Out[420]:
array([0.66337694, 0.55906502, 0.70230586, 0.66323141, 0.57006848,
       0.59667173, 0.70717383, 0.55795302, 0.51038174, 0.5328738 ])
In [422]:
#esikleme isleminin el ile yapilmasi
y_pred = [1 if i > 0.5 else 0 for i in y_probs]
In [424]:
y_pred[0:10]
Out[424]:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

raporlama

In [427]:
confusion_matrix(y, y_pred)
Out[427]:
array([[  42, 1571],
       [  51, 2605]], dtype=int64)
In [429]:
accuracy_score(y, y_pred)
Out[429]:
0.6200515343171703
In [431]:
print(classification_report(y, y_pred))
              precision    recall  f1-score   support

           0       0.45      0.03      0.05      1613
           1       0.62      0.98      0.76      2656

    accuracy                           0.62      4269
   macro avg       0.54      0.50      0.41      4269
weighted avg       0.56      0.62      0.49      4269

In [433]:
log_reg.predict_proba(X)[:,1][0:5]
Out[433]:
array([0.66337694, 0.55906502, 0.70230586, 0.66323141, 0.57006848])
In [435]:
logit_roc_auc = roc_auc_score(y, log_reg.predict(X))

fpr, tpr, thresholds = roc_curve(y, log_reg.predict_proba(X)[:,1])
plt.figure()
plt.plot(fpr, tpr, label='AUC (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Oranı')
plt.ylabel('True Positive Oranı')
plt.title('ROC')
plt.show()
print("AUC=",logit_roc_auc)
No description has been provided for this image
AUC= 0.5034183152324113
In [437]:
#test train ayırma işlemi
#verisetindeki verisetinin azlığından, sınıfların eşit dağılmamasından dolayı
#test setini 30% olarak ayarladık
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.30, 
                                                    random_state = 42)
In [439]:
loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X_train,y_train)
loj_model
Out[439]:
LogisticRegression(solver='liblinear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(solver='liblinear')
In [441]:
accuracy_score(y_test, loj_model.predict(X_test))
Out[441]:
0.6268540202966433
In [443]:
cross_val_score(loj_model, X_test, y_test, cv = 10)
Out[443]:
array([0.62790698, 0.6328125 , 0.65625   , 0.6484375 , 0.609375  ,
       0.6328125 , 0.6328125 , 0.6328125 , 0.71875   , 0.6171875 ])
In [445]:
cross_val_score(loj_model, X_test, y_test, cv = 10).mean()
Out[445]:
0.6409156976744186

Gaussian Naive Bayes¶

In [448]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [450]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [452]:
nb = GaussianNB()
nb_model = nb.fit(X_train, y_train)
nb_model
Out[452]:
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
In [454]:
nb_model.predict(X_test)[0:10]
Out[454]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
In [456]:
nb_model.predict_proba(X_test)[0:10]
Out[456]:
array([[0.49531051, 0.50468949],
       [0.37832043, 0.62167957],
       [0.45432182, 0.54567818],
       [0.37032466, 0.62967534],
       [0.31148022, 0.68851978],
       [0.35897057, 0.64102943],
       [0.31247462, 0.68752538],
       [0.4568877 , 0.5431123 ],
       [0.37310464, 0.62689536],
       [0.41564146, 0.58435854]])
In [458]:
y_pred = nb_model.predict(X_test)
In [460]:
accuracy_score(y_test, y_pred)
Out[460]:
0.7681498829039812
In [462]:
cross_val_score(nb_model, X_test, y_test, cv = 10).mean()
Out[462]:
0.7658187984496123

KNN¶

In [465]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [467]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [469]:
knn = KNeighborsClassifier()
knn_model = knn.fit(X_train, y_train)
knn_model
Out[469]:
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
In [471]:
y_pred = knn_model.predict(X_test)
In [473]:
accuracy_score(y_test, y_pred)
Out[473]:
0.5784543325526932
In [475]:
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

           0       0.40      0.30      0.34       471
           1       0.65      0.74      0.69       810

    accuracy                           0.58      1281
   macro avg       0.52      0.52      0.52      1281
weighted avg       0.56      0.58      0.56      1281

In [477]:
knn_params = {"n_neighbors": np.arange(1,50)}
In [479]:
knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, knn_params, cv=10)
knn_cv.fit(X_train, y_train)
Out[479]:
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})
KNeighborsClassifier()
KNeighborsClassifier()
In [480]:
print("En iyi skor:" + str(knn_cv.best_score_))
print("En iyi parametreler: " + str(knn_cv.best_params_))
En iyi skor:0.6157942582658077
En iyi parametreler: {'n_neighbors': 49}
In [483]:
knn = KNeighborsClassifier(49)
knn_tuned = knn.fit(X_train, y_train)
In [485]:
knn_tuned.score(X_test, y_test)
Out[485]:
0.6104605776736924
In [487]:
y_pred = knn_tuned.predict(X_test)
In [489]:
accuracy_score(y_test, y_pred)
Out[489]:
0.6104605776736924

SVC¶

In [193]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [158]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [ ]:
svm_model = SVC.fit(X_train, y_train)
In [ ]:
svc_params = {"C": np.arange(1,10)}

svc = SVC(kernel = "linear")

svc_cv_model = GridSearchCV(svc,svc_params, 
                            cv = 10, 
                            n_jobs = -1, 
                            verbose = 2 )

svc_cv_model.fit(X_train, y_train)
In [ ]:
 

YSA¶

In [189]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [195]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [197]:
scaler = StandardScaler()
In [199]:
#veri standardizasyonu
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
In [201]:
X_test_scaled[0:5]
Out[201]:
array([[ 1.52932527,  0.1138622 ,  0.49415249,  1.57740038, -1.02744105,
        -0.11063326,  1.22342595,  0.0528932 ,  0.72466991,  0.99266412,
        -0.99266412,  1.0148356 , -1.0148356 ,  1.27140173],
       [-0.25151024,  0.2911526 , -0.1329306 , -0.52470577, -0.01207882,
        -0.40407016,  1.10455922,  0.28186474,  0.53869149,  0.99266412,
        -0.99266412,  1.0148356 , -1.0148356 , -0.78653346],
       [ 0.3421016 ,  1.6031015 ,  0.5161554 ,  0.52634731, -0.86013705,
        -0.48558041,  2.6973734 ,  1.44852927,  0.50769508,  0.99266412,
        -0.99266412,  1.0148356 , -1.0148356 ,  1.27140173],
       [-0.25151024,  0.39752683,  0.90120642, -0.52470577,  0.0225358 ,
         0.45993849,  1.41361272,  0.71800101,  1.31360159,  0.99266412,
        -0.99266412,  1.0148356 , -1.0148356 , -0.78653346],
       [ 0.3421016 ,  0.25569452, -0.12192914,  0.17599628,  0.78982658,
         0.73707335, -0.10788141,  0.02018298,  1.06563036, -1.00739009,
         1.00739009, -0.98538127,  0.98538127, -0.78653346]])
In [203]:
mlpc = MLPClassifier().fit(X_train_scaled, y_train)
In [205]:
#modelden neler ogrenebiliriz? 
dir(mlpc)
Out[205]:
['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_backprop',
 '_check_feature_names',
 '_check_n_features',
 '_check_solver',
 '_compute_loss_grad',
 '_estimator_type',
 '_fit',
 '_fit_lbfgs',
 '_fit_stochastic',
 '_forward_pass',
 '_forward_pass_fast',
 '_get_param_names',
 '_get_tags',
 '_init_coef',
 '_initialize',
 '_label_binarizer',
 '_loss_grad_lbfgs',
 '_more_tags',
 '_no_improvement_count',
 '_optimizer',
 '_parameter_constraints',
 '_predict',
 '_random_state',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_score',
 '_unpack',
 '_update_no_improvement_count',
 '_validate_data',
 '_validate_input',
 '_validate_params',
 'activation',
 'alpha',
 'batch_size',
 'best_loss_',
 'best_validation_score_',
 'beta_1',
 'beta_2',
 'classes_',
 'coefs_',
 'early_stopping',
 'epsilon',
 'fit',
 'get_params',
 'hidden_layer_sizes',
 'intercepts_',
 'learning_rate',
 'learning_rate_init',
 'loss',
 'loss_',
 'loss_curve_',
 'max_fun',
 'max_iter',
 'momentum',
 'n_features_in_',
 'n_iter_',
 'n_iter_no_change',
 'n_layers_',
 'n_outputs_',
 'nesterovs_momentum',
 'out_activation_',
 'partial_fit',
 'power_t',
 'predict',
 'predict_log_proba',
 'predict_proba',
 'random_state',
 'score',
 'set_params',
 'shuffle',
 'solver',
 't_',
 'tol',
 'validation_fraction',
 'validation_scores_',
 'verbose',
 'warm_start']
In [207]:
y_pred = mlpc.predict(X_test_scaled)
accuracy_score(y_test, y_pred)
Out[207]:
1.0
In [209]:
mlpc
Out[209]:
MLPClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPClassifier()
In [211]:
?mlpc
Type:        MLPClassifier
String form: MLPClassifier()
File:        c:\users\eniac\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py
Docstring:  
Multi-layer Perceptron classifier.

This model optimizes the log-loss function using LBFGS or stochastic
gradient descent.

.. versionadded:: 0.18

Parameters
----------
hidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)
    The ith element represents the number of neurons in the ith
    hidden layer.

activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'
    Activation function for the hidden layer.

    - 'identity', no-op activation, useful to implement linear bottleneck,
      returns f(x) = x

    - 'logistic', the logistic sigmoid function,
      returns f(x) = 1 / (1 + exp(-x)).

    - 'tanh', the hyperbolic tan function,
      returns f(x) = tanh(x).

    - 'relu', the rectified linear unit function,
      returns f(x) = max(0, x)

solver : {'lbfgs', 'sgd', 'adam'}, default='adam'
    The solver for weight optimization.

    - 'lbfgs' is an optimizer in the family of quasi-Newton methods.

    - 'sgd' refers to stochastic gradient descent.

    - 'adam' refers to a stochastic gradient-based optimizer proposed
      by Kingma, Diederik, and Jimmy Ba

    Note: The default solver 'adam' works pretty well on relatively
    large datasets (with thousands of training samples or more) in terms of
    both training time and validation score.
    For small datasets, however, 'lbfgs' can converge faster and perform
    better.

alpha : float, default=0.0001
    Strength of the L2 regularization term. The L2 regularization term
    is divided by the sample size when added to the loss.

batch_size : int, default='auto'
    Size of minibatches for stochastic optimizers.
    If the solver is 'lbfgs', the classifier will not use minibatch.
    When set to "auto", `batch_size=min(200, n_samples)`.

learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'
    Learning rate schedule for weight updates.

    - 'constant' is a constant learning rate given by
      'learning_rate_init'.

    - 'invscaling' gradually decreases the learning rate at each
      time step 't' using an inverse scaling exponent of 'power_t'.
      effective_learning_rate = learning_rate_init / pow(t, power_t)

    - 'adaptive' keeps the learning rate constant to
      'learning_rate_init' as long as training loss keeps decreasing.
      Each time two consecutive epochs fail to decrease training loss by at
      least tol, or fail to increase validation score by at least tol if
      'early_stopping' is on, the current learning rate is divided by 5.

    Only used when ``solver='sgd'``.

learning_rate_init : float, default=0.001
    The initial learning rate used. It controls the step-size
    in updating the weights. Only used when solver='sgd' or 'adam'.

power_t : float, default=0.5
    The exponent for inverse scaling learning rate.
    It is used in updating effective learning rate when the learning_rate
    is set to 'invscaling'. Only used when solver='sgd'.

max_iter : int, default=200
    Maximum number of iterations. The solver iterates until convergence
    (determined by 'tol') or this number of iterations. For stochastic
    solvers ('sgd', 'adam'), note that this determines the number of epochs
    (how many times each data point will be used), not the number of
    gradient steps.

shuffle : bool, default=True
    Whether to shuffle samples in each iteration. Only used when
    solver='sgd' or 'adam'.

random_state : int, RandomState instance, default=None
    Determines random number generation for weights and bias
    initialization, train-test split if early stopping is used, and batch
    sampling when solver='sgd' or 'adam'.
    Pass an int for reproducible results across multiple function calls.
    See :term:`Glossary <random_state>`.

tol : float, default=1e-4
    Tolerance for the optimization. When the loss or score is not improving
    by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,
    unless ``learning_rate`` is set to 'adaptive', convergence is
    considered to be reached and training stops.

verbose : bool, default=False
    Whether to print progress messages to stdout.

warm_start : bool, default=False
    When set to True, reuse the solution of the previous
    call to fit as initialization, otherwise, just erase the
    previous solution. See :term:`the Glossary <warm_start>`.

momentum : float, default=0.9
    Momentum for gradient descent update. Should be between 0 and 1. Only
    used when solver='sgd'.

nesterovs_momentum : bool, default=True
    Whether to use Nesterov's momentum. Only used when solver='sgd' and
    momentum > 0.

early_stopping : bool, default=False
    Whether to use early stopping to terminate training when validation
    score is not improving. If set to true, it will automatically set
    aside 10% of training data as validation and terminate training when
    validation score is not improving by at least tol for
    ``n_iter_no_change`` consecutive epochs. The split is stratified,
    except in a multilabel setting.
    If early stopping is False, then the training stops when the training
    loss does not improve by more than tol for n_iter_no_change consecutive
    passes over the training set.
    Only effective when solver='sgd' or 'adam'.

validation_fraction : float, default=0.1
    The proportion of training data to set aside as validation set for
    early stopping. Must be between 0 and 1.
    Only used if early_stopping is True.

beta_1 : float, default=0.9
    Exponential decay rate for estimates of first moment vector in adam,
    should be in [0, 1). Only used when solver='adam'.

beta_2 : float, default=0.999
    Exponential decay rate for estimates of second moment vector in adam,
    should be in [0, 1). Only used when solver='adam'.

epsilon : float, default=1e-8
    Value for numerical stability in adam. Only used when solver='adam'.

n_iter_no_change : int, default=10
    Maximum number of epochs to not meet ``tol`` improvement.
    Only effective when solver='sgd' or 'adam'.

    .. versionadded:: 0.20

max_fun : int, default=15000
    Only used when solver='lbfgs'. Maximum number of loss function calls.
    The solver iterates until convergence (determined by 'tol'), number
    of iterations reaches max_iter, or this number of loss function calls.
    Note that number of loss function calls will be greater than or equal
    to the number of iterations for the `MLPClassifier`.

    .. versionadded:: 0.22

Attributes
----------
classes_ : ndarray or list of ndarray of shape (n_classes,)
    Class labels for each output.

loss_ : float
    The current loss computed with the loss function.

best_loss_ : float or None
    The minimum loss reached by the solver throughout fitting.
    If `early_stopping=True`, this attribute is set ot `None`. Refer to
    the `best_validation_score_` fitted attribute instead.

loss_curve_ : list of shape (`n_iter_`,)
    The ith element in the list represents the loss at the ith iteration.

validation_scores_ : list of shape (`n_iter_`,) or None
    The score at each iteration on a held-out validation set. The score
    reported is the accuracy score. Only available if `early_stopping=True`,
    otherwise the attribute is set to `None`.

best_validation_score_ : float or None
    The best validation score (i.e. accuracy score) that triggered the
    early stopping. Only available if `early_stopping=True`, otherwise the
    attribute is set to `None`.

t_ : int
    The number of training samples seen by the solver during fitting.

coefs_ : list of shape (n_layers - 1,)
    The ith element in the list represents the weight matrix corresponding
    to layer i.

intercepts_ : list of shape (n_layers - 1,)
    The ith element in the list represents the bias vector corresponding to
    layer i + 1.

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

n_iter_ : int
    The number of iterations the solver has run.

n_layers_ : int
    Number of layers.

n_outputs_ : int
    Number of outputs.

out_activation_ : str
    Name of the output activation function.

See Also
--------
MLPRegressor : Multi-layer Perceptron regressor.
BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM).

Notes
-----
MLPClassifier trains iteratively since at each time step
the partial derivatives of the loss function with respect to the model
parameters are computed to update the parameters.

It can also have a regularization term added to the loss function
that shrinks model parameters to prevent overfitting.

This implementation works with data represented as dense numpy arrays or
sparse scipy arrays of floating point values.

References
----------
Hinton, Geoffrey E. "Connectionist learning procedures."
Artificial intelligence 40.1 (1989): 185-234.

Glorot, Xavier, and Yoshua Bengio.
"Understanding the difficulty of training deep feedforward neural networks."
International Conference on Artificial Intelligence and Statistics. 2010.

:arxiv:`He, Kaiming, et al (2015). "Delving deep into rectifiers:
Surpassing human-level performance on imagenet classification." <1502.01852>`

:arxiv:`Kingma, Diederik, and Jimmy Ba (2014)
"Adam: A method for stochastic optimization." <1412.6980>`

Examples
--------
>>> from sklearn.neural_network import MLPClassifier
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import train_test_split
>>> X, y = make_classification(n_samples=100, random_state=1)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
...                                                     random_state=1)
>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
>>> clf.predict_proba(X_test[:1])
array([[0.038..., 0.961...]])
>>> clf.predict(X_test[:5, :])
array([1, 0, 1, 0, 1])
>>> clf.score(X_test, y_test)
0.8...
In [213]:
mlpc_params = {"alpha": [0.1, 0.01, 0.02, 0.005, 0.0001,0.00001],
              "hidden_layer_sizes": [(10,10,10),
                                     (100,100,100),
                                     (100,100),
                                     (3,5), 
                                     (5, 3)],
              "solver" : ["lbfgs","adam","sgd"],
              "activation": ["relu","logistic"]}
In [215]:
mlpc = MLPClassifier()
mlpc_cv_model = GridSearchCV(mlpc, mlpc_params, 
                         cv = 10, 
                         n_jobs = -1,
                         verbose = 2)

mlpc_cv_model.fit(X_train_scaled, y_train)
Fitting 10 folds for each of 180 candidates, totalling 1800 fits
Out[215]:
GridSearchCV(cv=10, estimator=MLPClassifier(), n_jobs=-1,
             param_grid={'activation': ['relu', 'logistic'],
                         'alpha': [0.1, 0.01, 0.02, 0.005, 0.0001, 1e-05],
                         'hidden_layer_sizes': [(10, 10, 10), (100, 100, 100),
                                                (100, 100), (3, 5), (5, 3)],
                         'solver': ['lbfgs', 'adam', 'sgd']},
             verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=MLPClassifier(), n_jobs=-1,
             param_grid={'activation': ['relu', 'logistic'],
                         'alpha': [0.1, 0.01, 0.02, 0.005, 0.0001, 1e-05],
                         'hidden_layer_sizes': [(10, 10, 10), (100, 100, 100),
                                                (100, 100), (3, 5), (5, 3)],
                         'solver': ['lbfgs', 'adam', 'sgd']},
             verbose=2)
MLPClassifier()
MLPClassifier()
In [217]:
 print("En iyi parametreler: " + str(mlpc_cv_model.best_params_))
En iyi parametreler: {'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (10, 10, 10), 'solver': 'lbfgs'}
In [221]:
mlpc_tuned = MLPClassifier(activation = "relu", 
                           alpha = 0.1, 
                           hidden_layer_sizes = (10, 10, 10),
                          solver = "lbfgs")
In [223]:
mlpc_tuned.fit(X_train_scaled, y_train)
Out[223]:
MLPClassifier(alpha=0.1, hidden_layer_sizes=(10, 10, 10), solver='lbfgs')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPClassifier(alpha=0.1, hidden_layer_sizes=(10, 10, 10), solver='lbfgs')
In [225]:
y_pred = mlpc_tuned.predict(X_test_scaled)
accuracy_score(y_test, y_pred)
Out[225]:
1.0
In [231]:
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       471
           1       1.00      1.00      1.00       810

    accuracy                           1.00      1281
   macro avg       1.00      1.00      1.00      1281
weighted avg       1.00      1.00      1.00      1281

Karar Ağaçları¶

In [234]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [236]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [238]:
cart = DecisionTreeClassifier()
cart_model = cart.fit(X_train, y_train)
In [240]:
cart_model
Out[240]:
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [248]:
import astor
#modelin python koduna donusturulmesi
print(skompile(cart_model.predict).to("python/code"))
(1 if x[13] <= 0.5 else 0)

In [251]:
y_pred = cart_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[251]:
1.0
In [253]:
cart_model
Out[253]:
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [255]:
?cart_model
Type:        DecisionTreeClassifier
String form: DecisionTreeClassifier()
File:        c:\users\eniac\anaconda3\lib\site-packages\sklearn\tree\_classes.py
Docstring:  
A decision tree classifier.

Read more in the :ref:`User Guide <tree>`.

Parameters
----------
criterion : {"gini", "entropy", "log_loss"}, default="gini"
    The function to measure the quality of a split. Supported criteria are
    "gini" for the Gini impurity and "log_loss" and "entropy" both for the
    Shannon information gain, see :ref:`tree_mathematical_formulation`.

splitter : {"best", "random"}, default="best"
    The strategy used to choose the split at each node. Supported
    strategies are "best" to choose the best split and "random" to choose
    the best random split.

max_depth : int, default=None
    The maximum depth of the tree. If None, then nodes are expanded until
    all leaves are pure or until all leaves contain less than
    min_samples_split samples.

min_samples_split : int or float, default=2
    The minimum number of samples required to split an internal node:

    - If int, then consider `min_samples_split` as the minimum number.
    - If float, then `min_samples_split` is a fraction and
      `ceil(min_samples_split * n_samples)` are the minimum
      number of samples for each split.

    .. versionchanged:: 0.18
       Added float values for fractions.

min_samples_leaf : int or float, default=1
    The minimum number of samples required to be at a leaf node.
    A split point at any depth will only be considered if it leaves at
    least ``min_samples_leaf`` training samples in each of the left and
    right branches.  This may have the effect of smoothing the model,
    especially in regression.

    - If int, then consider `min_samples_leaf` as the minimum number.
    - If float, then `min_samples_leaf` is a fraction and
      `ceil(min_samples_leaf * n_samples)` are the minimum
      number of samples for each node.

    .. versionchanged:: 0.18
       Added float values for fractions.

min_weight_fraction_leaf : float, default=0.0
    The minimum weighted fraction of the sum total of weights (of all
    the input samples) required to be at a leaf node. Samples have
    equal weight when sample_weight is not provided.

max_features : int, float or {"auto", "sqrt", "log2"}, default=None
    The number of features to consider when looking for the best split:

        - If int, then consider `max_features` features at each split.
        - If float, then `max_features` is a fraction and
          `max(1, int(max_features * n_features_in_))` features are considered at
          each split.
        - If "auto", then `max_features=sqrt(n_features)`.
        - If "sqrt", then `max_features=sqrt(n_features)`.
        - If "log2", then `max_features=log2(n_features)`.
        - If None, then `max_features=n_features`.

        .. deprecated:: 1.1
            The `"auto"` option was deprecated in 1.1 and will be removed
            in 1.3.

    Note: the search for a split does not stop until at least one
    valid partition of the node samples is found, even if it requires to
    effectively inspect more than ``max_features`` features.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of the estimator. The features are always
    randomly permuted at each split, even if ``splitter`` is set to
    ``"best"``. When ``max_features < n_features``, the algorithm will
    select ``max_features`` at random at each split before finding the best
    split among them. But the best found split may vary across different
    runs, even if ``max_features=n_features``. That is the case, if the
    improvement of the criterion is identical for several splits and one
    split has to be selected at random. To obtain a deterministic behaviour
    during fitting, ``random_state`` has to be fixed to an integer.
    See :term:`Glossary <random_state>` for details.

max_leaf_nodes : int, default=None
    Grow a tree with ``max_leaf_nodes`` in best-first fashion.
    Best nodes are defined as relative reduction in impurity.
    If None then unlimited number of leaf nodes.

min_impurity_decrease : float, default=0.0
    A node will be split if this split induces a decrease of the impurity
    greater than or equal to this value.

    The weighted impurity decrease equation is the following::

        N_t / N * (impurity - N_t_R / N_t * right_impurity
                            - N_t_L / N_t * left_impurity)

    where ``N`` is the total number of samples, ``N_t`` is the number of
    samples at the current node, ``N_t_L`` is the number of samples in the
    left child, and ``N_t_R`` is the number of samples in the right child.

    ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
    if ``sample_weight`` is passed.

    .. versionadded:: 0.19

class_weight : dict, list of dict or "balanced", default=None
    Weights associated with classes in the form ``{class_label: weight}``.
    If None, all classes are supposed to have weight one. For
    multi-output problems, a list of dicts can be provided in the same
    order as the columns of y.

    Note that for multioutput (including multilabel) weights should be
    defined for each class of every column in its own dict. For example,
    for four-class multilabel classification weights should be
    [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
    [{1:1}, {2:5}, {3:1}, {4:1}].

    The "balanced" mode uses the values of y to automatically adjust
    weights inversely proportional to class frequencies in the input data
    as ``n_samples / (n_classes * np.bincount(y))``

    For multi-output, the weights of each column of y will be multiplied.

    Note that these weights will be multiplied with sample_weight (passed
    through the fit method) if sample_weight is specified.

ccp_alpha : non-negative float, default=0.0
    Complexity parameter used for Minimal Cost-Complexity Pruning. The
    subtree with the largest cost complexity that is smaller than
    ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
    :ref:`minimal_cost_complexity_pruning` for details.

    .. versionadded:: 0.22

Attributes
----------
classes_ : ndarray of shape (n_classes,) or list of ndarray
    The classes labels (single output problem),
    or a list of arrays of class labels (multi-output problem).

feature_importances_ : ndarray of shape (n_features,)
    The impurity-based feature importances.
    The higher, the more important the feature.
    The importance of a feature is computed as the (normalized)
    total reduction of the criterion brought by that feature.  It is also
    known as the Gini importance [4]_.

    Warning: impurity-based feature importances can be misleading for
    high cardinality features (many unique values). See
    :func:`sklearn.inspection.permutation_importance` as an alternative.

max_features_ : int
    The inferred value of max_features.

n_classes_ : int or list of int
    The number of classes (for single output problems),
    or a list containing the number of classes for each
    output (for multi-output problems).

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

n_outputs_ : int
    The number of outputs when ``fit`` is performed.

tree_ : Tree instance
    The underlying Tree object. Please refer to
    ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and
    :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`
    for basic usage of these attributes.

See Also
--------
DecisionTreeRegressor : A decision tree regressor.

Notes
-----
The default values for the parameters controlling the size of the trees
(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
unpruned trees which can potentially be very large on some data sets. To
reduce memory consumption, the complexity and size of the trees should be
controlled by setting those parameter values.

The :meth:`predict` method operates using the :func:`numpy.argmax`
function on the outputs of :meth:`predict_proba`. This means that in
case the highest predicted probabilities are tied, the classifier will
predict the tied class with the lowest index in :term:`classes_`.

References
----------

.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning

.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, "Classification
       and Regression Trees", Wadsworth, Belmont, CA, 1984.

.. [3] T. Hastie, R. Tibshirani and J. Friedman. "Elements of Statistical
       Learning", Springer, 2009.

.. [4] L. Breiman, and A. Cutler, "Random Forests",
       https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm

Examples
--------
>>> from sklearn.datasets import load_iris
>>> from sklearn.model_selection import cross_val_score
>>> from sklearn.tree import DecisionTreeClassifier
>>> clf = DecisionTreeClassifier(random_state=0)
>>> iris = load_iris()
>>> cross_val_score(clf, iris.data, iris.target, cv=10)
...                             # doctest: +SKIP
...
array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
        0.93...,  0.93...,  1.     ,  0.93...,  1.      ])
In [257]:
cart_grid = {"max_depth": range(1,10),
            "min_samples_split" : list(range(2,50)) }
In [259]:
cart = tree.DecisionTreeClassifier()
cart_cv = GridSearchCV(cart, cart_grid, cv = 10, n_jobs = -1, verbose = 2)
cart_cv_model = cart_cv.fit(X_train, y_train)
Fitting 10 folds for each of 432 candidates, totalling 4320 fits
In [261]:
print("En iyi parametreler: " + str(cart_cv_model.best_params_))
En iyi parametreler: {'max_depth': 1, 'min_samples_split': 2}
In [263]:
cart = tree.DecisionTreeClassifier(max_depth = 1, min_samples_split = 2)
cart_tuned = cart.fit(X_train, y_train)
In [265]:
y_pred = cart_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[265]:
1.0

Random Forests¶

In [268]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [270]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [272]:
rf_model = RandomForestClassifier().fit(X_train, y_train)
In [274]:
y_pred = rf_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[274]:
1.0
In [276]:
rf_params = {"max_depth": [2,5,8,10],
            "max_features": [2,5,8],
            "n_estimators": [10,500,1000],
            "min_samples_split": [2,5,10]}
In [278]:
rf_model = RandomForestClassifier()

rf_cv_model = GridSearchCV(rf_model, 
                           rf_params, 
                           cv = 10, 
                           n_jobs = -1, 
                           verbose = 2) 
In [280]:
rf_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 108 candidates, totalling 1080 fits
Out[280]:
GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'max_depth': [2, 5, 8, 10], 'max_features': [2, 5, 8],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [10, 500, 1000]},
             verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'max_depth': [2, 5, 8, 10], 'max_features': [2, 5, 8],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [10, 500, 1000]},
             verbose=2)
RandomForestClassifier()
RandomForestClassifier()
In [283]:
print("En iyi parametreler: " + str(rf_cv_model.best_params_))
En iyi parametreler: {'max_depth': 2, 'max_features': 5, 'min_samples_split': 2, 'n_estimators': 10}
In [285]:
rf_tuned = RandomForestClassifier(max_depth = 2, 
                                  max_features = 5, 
                                  min_samples_split = 2,
                                  n_estimators = 10)

rf_tuned.fit(X_train, y_train)
Out[285]:
RandomForestClassifier(max_depth=2, max_features=5, n_estimators=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=2, max_features=5, n_estimators=10)
In [287]:
y_pred = rf_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[287]:
0.9976580796252927
In [289]:
Importance = pd.DataFrame({"Importance": rf_tuned.feature_importances_*100},
                         index = X_train.columns)
In [291]:
Importance.sort_values(by = "Importance", 
                       axis = 0, 
                       ascending = True).plot(kind ="barh", color = "r")

plt.xlabel("Değişken Önem Düzeyleri")
Out[291]:
Text(0.5, 0, 'Değişken Önem Düzeyleri')
No description has been provided for this image

Gradient Boosting Machines (GBM)¶

In [294]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [296]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [298]:
gbm_model = GradientBoostingClassifier().fit(X_train, y_train)
In [300]:
y_pred = gbm_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[300]:
1.0
In [ ]:
?gbm_model
In [302]:
gbm_params = {"learning_rate" : [0.001, 0.01, 0.1, 0.05],
             "n_estimators": [100,500,100],
             "max_depth": [3,5,10],
             "min_samples_split": [2,5,10]}
In [304]:
gbm = GradientBoostingClassifier()

gbm_cv = GridSearchCV(gbm, gbm_params, cv = 10, n_jobs = -1, verbose = 2)
In [306]:
gbm_cv.fit(X_train, y_train)
Fitting 10 folds for each of 108 candidates, totalling 1080 fits
Out[306]:
GridSearchCV(cv=10, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.001, 0.01, 0.1, 0.05],
                         'max_depth': [3, 5, 10],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [100, 500, 100]},
             verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.001, 0.01, 0.1, 0.05],
                         'max_depth': [3, 5, 10],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [100, 500, 100]},
             verbose=2)
GradientBoostingClassifier()
GradientBoostingClassifier()
In [313]:
print("En iyi parametreler: " + str(gbm_cv.best_params_))
En iyi parametreler: {'learning_rate': 0.001, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 500}
In [315]:
gbm = GradientBoostingClassifier(learning_rate = 0.001, 
                                 max_depth = 3,
                                min_samples_split = 2,
                                n_estimators = 500)
In [317]:
gbm_tuned =  gbm.fit(X_train,y_train)
In [318]:
y_pred = gbm_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[318]:
1.0

XGBoost (eXtreme Gradient Boosting)¶

In [321]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [323]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [325]:
xgb_model = XGBClassifier().fit(X_train, y_train)
In [327]:
xgb_model
Out[327]:
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
In [329]:
y_pred = xgb_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[329]:
1.0
In [331]:
xgb_params = {
        'n_estimators': [100, 500, 1000, 2000],
        'subsample': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5,6],
        'learning_rate': [0.1,0.01,0.02,0.05],
        "min_samples_split": [2,5,10]}
In [333]:
xgb = XGBClassifier()

xgb_cv_model = GridSearchCV(xgb, xgb_params, cv = 10, n_jobs = -1, verbose = 2)
In [335]:
xgb_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 576 candidates, totalling 5760 fits
Out[335]:
GridSearchCV(cv=10,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, device=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=None,
                                     grow_policy=None, importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None...
                                     max_leaves=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     multi_strategy=None, n_estimators=None,
                                     n_jobs=None, num_parallel_tree=None,
                                     random_state=None, ...),
             n_jobs=-1,
             param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
                         'max_depth': [3, 4, 5, 6],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [100, 500, 1000, 2000],
                         'subsample': [0.6, 0.8, 1.0]},
             verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, device=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=None,
                                     grow_policy=None, importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None...
                                     max_leaves=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     multi_strategy=None, n_estimators=None,
                                     n_jobs=None, num_parallel_tree=None,
                                     random_state=None, ...),
             n_jobs=-1,
             param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
                         'max_depth': [3, 4, 5, 6],
                         'min_samples_split': [2, 5, 10],
                         'n_estimators': [100, 500, 1000, 2000],
                         'subsample': [0.6, 0.8, 1.0]},
             verbose=2)
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...)
In [341]:
xgb_cv_model.best_params_
Out[341]:
{'learning_rate': 0.1,
 'max_depth': 3,
 'min_samples_split': 2,
 'n_estimators': 100,
 'subsample': 0.6}
In [343]:
xgb = XGBClassifier(learning_rate = 0.1, 
                    max_depth = 3,
                    min_samples_split = 2,
                    n_estimators = 100,
                    subsample = 0.6)
In [345]:
xgb_tuned =  xgb.fit(X_train,y_train)
In [347]:
y_pred = xgb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[347]:
1.0

Light GBM¶

In [350]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [352]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [354]:
lgbm_model = LGBMClassifier().fit(X_train, y_train)
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 1846, number of negative: 1142
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1412
[LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240
[LightGBM] [Info] Start training from score 0.480240
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
In [356]:
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[356]:
1.0
In [358]:
lgbm_params = {
        'n_estimators': [100, 500, 1000, 2000],
        'subsample': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5,6],
        'learning_rate': [0.1,0.01,0.02,0.05],
        "min_child_samples": [5,10,20]}
In [360]:
lgbm = LGBMClassifier()

lgbm_cv_model = GridSearchCV(lgbm, lgbm_params, 
                             cv = 10, 
                             n_jobs = -1, 
                             verbose = 2)
In [362]:
lgbm_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 576 candidates, totalling 5760 fits
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 1846, number of negative: 1142
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1412
[LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240
[LightGBM] [Info] Start training from score 0.480240
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Out[362]:
GridSearchCV(cv=10, estimator=LGBMClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
                         'max_depth': [3, 4, 5, 6],
                         'min_child_samples': [5, 10, 20],
                         'n_estimators': [100, 500, 1000, 2000],
                         'subsample': [0.6, 0.8, 1.0]},
             verbose=2)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=LGBMClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
                         'max_depth': [3, 4, 5, 6],
                         'min_child_samples': [5, 10, 20],
                         'n_estimators': [100, 500, 1000, 2000],
                         'subsample': [0.6, 0.8, 1.0]},
             verbose=2)
LGBMClassifier()
LGBMClassifier()
In [364]:
lgbm_cv_model.best_params_
Out[364]:
{'learning_rate': 0.1,
 'max_depth': 3,
 'min_child_samples': 5,
 'n_estimators': 100,
 'subsample': 0.6}
In [368]:
lgbm = LGBMClassifier(learning_rate = 0.01, 
                       max_depth = 3,
                       subsample = 0.6,
                       n_estimators = 100,
                       min_child_samples = 5)
In [370]:
lgbm_tuned = lgbm.fit(X_train,y_train)
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Number of positive: 1846, number of negative: 1142
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000067 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1412
[LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240
[LightGBM] [Info] Start training from score 0.480240
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
In [372]:
y_pred = lgbm_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[372]:
1.0

CatBoost¶

In [375]:
X = ml_df.drop('loan_status_Approved', axis=1)  # Bağımsız değişkenler
y = ml_df['loan_status_Approved']  # Bağımlı değişken
In [377]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [379]:
cat_model = CatBoostClassifier().fit(X_train, y_train)
Learning rate set to 0.016441
0:	learn: 0.6464533	total: 140ms	remaining: 2m 19s
1:	learn: 0.6023797	total: 142ms	remaining: 1m 10s
2:	learn: 0.5566657	total: 144ms	remaining: 47.7s
3:	learn: 0.5171310	total: 146ms	remaining: 36.2s
4:	learn: 0.4850220	total: 147ms	remaining: 29.3s
5:	learn: 0.4516708	total: 149ms	remaining: 24.8s
6:	learn: 0.4181626	total: 151ms	remaining: 21.5s
7:	learn: 0.3901921	total: 153ms	remaining: 19s
8:	learn: 0.3623896	total: 155ms	remaining: 17.1s
9:	learn: 0.3346459	total: 157ms	remaining: 15.6s
10:	learn: 0.3106111	total: 159ms	remaining: 14.3s
11:	learn: 0.2914515	total: 161ms	remaining: 13.2s
12:	learn: 0.2737354	total: 163ms	remaining: 12.4s
13:	learn: 0.2545953	total: 165ms	remaining: 11.6s
14:	learn: 0.2392659	total: 166ms	remaining: 10.9s
15:	learn: 0.2239485	total: 168ms	remaining: 10.3s
16:	learn: 0.2080996	total: 170ms	remaining: 9.84s
17:	learn: 0.1954131	total: 172ms	remaining: 9.39s
18:	learn: 0.1831998	total: 174ms	remaining: 8.98s
19:	learn: 0.1714466	total: 176ms	remaining: 8.61s
20:	learn: 0.1575300	total: 177ms	remaining: 8.25s
21:	learn: 0.1482057	total: 179ms	remaining: 7.95s
22:	learn: 0.1389805	total: 181ms	remaining: 7.68s
23:	learn: 0.1296014	total: 182ms	remaining: 7.41s
24:	learn: 0.1210415	total: 184ms	remaining: 7.18s
25:	learn: 0.1141888	total: 186ms	remaining: 6.96s
26:	learn: 0.1068103	total: 188ms	remaining: 6.76s
27:	learn: 0.1008792	total: 189ms	remaining: 6.57s
28:	learn: 0.0950560	total: 191ms	remaining: 6.4s
29:	learn: 0.0889986	total: 193ms	remaining: 6.25s
30:	learn: 0.0842616	total: 195ms	remaining: 6.09s
31:	learn: 0.0792629	total: 197ms	remaining: 5.97s
32:	learn: 0.0747382	total: 199ms	remaining: 5.84s
33:	learn: 0.0712969	total: 201ms	remaining: 5.72s
34:	learn: 0.0669940	total: 203ms	remaining: 5.6s
35:	learn: 0.0628704	total: 205ms	remaining: 5.49s
36:	learn: 0.0595447	total: 207ms	remaining: 5.39s
37:	learn: 0.0564644	total: 209ms	remaining: 5.29s
38:	learn: 0.0538554	total: 211ms	remaining: 5.2s
39:	learn: 0.0513948	total: 213ms	remaining: 5.11s
40:	learn: 0.0490338	total: 215ms	remaining: 5.02s
41:	learn: 0.0466817	total: 217ms	remaining: 4.94s
42:	learn: 0.0447170	total: 218ms	remaining: 4.86s
43:	learn: 0.0426239	total: 220ms	remaining: 4.79s
44:	learn: 0.0402544	total: 222ms	remaining: 4.72s
45:	learn: 0.0380981	total: 224ms	remaining: 4.65s
46:	learn: 0.0361809	total: 226ms	remaining: 4.58s
47:	learn: 0.0342099	total: 228ms	remaining: 4.52s
48:	learn: 0.0324962	total: 230ms	remaining: 4.46s
49:	learn: 0.0310525	total: 231ms	remaining: 4.39s
50:	learn: 0.0297677	total: 233ms	remaining: 4.34s
51:	learn: 0.0283897	total: 235ms	remaining: 4.28s
52:	learn: 0.0273426	total: 237ms	remaining: 4.23s
53:	learn: 0.0261094	total: 239ms	remaining: 4.18s
54:	learn: 0.0249256	total: 241ms	remaining: 4.13s
55:	learn: 0.0240106	total: 242ms	remaining: 4.09s
56:	learn: 0.0228148	total: 244ms	remaining: 4.04s
57:	learn: 0.0218644	total: 246ms	remaining: 4s
58:	learn: 0.0210465	total: 248ms	remaining: 3.95s
59:	learn: 0.0203860	total: 250ms	remaining: 3.91s
60:	learn: 0.0190205	total: 251ms	remaining: 3.86s
61:	learn: 0.0181295	total: 252ms	remaining: 3.82s
62:	learn: 0.0174370	total: 254ms	remaining: 3.78s
63:	learn: 0.0168389	total: 256ms	remaining: 3.74s
64:	learn: 0.0161580	total: 258ms	remaining: 3.71s
65:	learn: 0.0153754	total: 259ms	remaining: 3.67s
66:	learn: 0.0147726	total: 261ms	remaining: 3.64s
67:	learn: 0.0141584	total: 263ms	remaining: 3.6s
68:	learn: 0.0136554	total: 265ms	remaining: 3.57s
69:	learn: 0.0132593	total: 267ms	remaining: 3.54s
70:	learn: 0.0128097	total: 269ms	remaining: 3.51s
71:	learn: 0.0120275	total: 270ms	remaining: 3.47s
72:	learn: 0.0116394	total: 271ms	remaining: 3.45s
73:	learn: 0.0113393	total: 273ms	remaining: 3.42s
74:	learn: 0.0110250	total: 275ms	remaining: 3.39s
75:	learn: 0.0107296	total: 277ms	remaining: 3.36s
76:	learn: 0.0103957	total: 279ms	remaining: 3.34s
77:	learn: 0.0100990	total: 280ms	remaining: 3.31s
78:	learn: 0.0097881	total: 282ms	remaining: 3.29s
79:	learn: 0.0095039	total: 284ms	remaining: 3.27s
80:	learn: 0.0092724	total: 286ms	remaining: 3.24s
81:	learn: 0.0090495	total: 288ms	remaining: 3.22s
82:	learn: 0.0088212	total: 290ms	remaining: 3.2s
83:	learn: 0.0086016	total: 291ms	remaining: 3.18s
84:	learn: 0.0083273	total: 293ms	remaining: 3.16s
85:	learn: 0.0081304	total: 295ms	remaining: 3.14s
86:	learn: 0.0079240	total: 297ms	remaining: 3.12s
87:	learn: 0.0077645	total: 299ms	remaining: 3.1s
88:	learn: 0.0075987	total: 301ms	remaining: 3.08s
89:	learn: 0.0074203	total: 303ms	remaining: 3.06s
90:	learn: 0.0072050	total: 304ms	remaining: 3.04s
91:	learn: 0.0070380	total: 306ms	remaining: 3.02s
92:	learn: 0.0068800	total: 308ms	remaining: 3s
93:	learn: 0.0066866	total: 310ms	remaining: 2.99s
94:	learn: 0.0065262	total: 312ms	remaining: 2.97s
95:	learn: 0.0063769	total: 314ms	remaining: 2.95s
96:	learn: 0.0062348	total: 315ms	remaining: 2.94s
97:	learn: 0.0060803	total: 317ms	remaining: 2.92s
98:	learn: 0.0059428	total: 319ms	remaining: 2.9s
99:	learn: 0.0058191	total: 321ms	remaining: 2.89s
100:	learn: 0.0056962	total: 323ms	remaining: 2.87s
101:	learn: 0.0055848	total: 325ms	remaining: 2.86s
102:	learn: 0.0054635	total: 326ms	remaining: 2.84s
103:	learn: 0.0053628	total: 328ms	remaining: 2.83s
104:	learn: 0.0052481	total: 330ms	remaining: 2.81s
105:	learn: 0.0051218	total: 332ms	remaining: 2.8s
106:	learn: 0.0049842	total: 334ms	remaining: 2.79s
107:	learn: 0.0048950	total: 336ms	remaining: 2.77s
108:	learn: 0.0048036	total: 338ms	remaining: 2.76s
109:	learn: 0.0047307	total: 339ms	remaining: 2.75s
110:	learn: 0.0046274	total: 341ms	remaining: 2.73s
111:	learn: 0.0045438	total: 343ms	remaining: 2.72s
112:	learn: 0.0044424	total: 345ms	remaining: 2.71s
113:	learn: 0.0043532	total: 347ms	remaining: 2.69s
114:	learn: 0.0042806	total: 349ms	remaining: 2.68s
115:	learn: 0.0042128	total: 350ms	remaining: 2.67s
116:	learn: 0.0041446	total: 352ms	remaining: 2.66s
117:	learn: 0.0040765	total: 354ms	remaining: 2.65s
118:	learn: 0.0040027	total: 356ms	remaining: 2.63s
119:	learn: 0.0039109	total: 358ms	remaining: 2.62s
120:	learn: 0.0038422	total: 360ms	remaining: 2.61s
121:	learn: 0.0037853	total: 362ms	remaining: 2.6s
122:	learn: 0.0037125	total: 364ms	remaining: 2.59s
123:	learn: 0.0036305	total: 365ms	remaining: 2.58s
124:	learn: 0.0035739	total: 367ms	remaining: 2.57s
125:	learn: 0.0034915	total: 369ms	remaining: 2.56s
126:	learn: 0.0034207	total: 371ms	remaining: 2.55s
127:	learn: 0.0033473	total: 372ms	remaining: 2.54s
128:	learn: 0.0033113	total: 374ms	remaining: 2.52s
129:	learn: 0.0032567	total: 376ms	remaining: 2.52s
130:	learn: 0.0032004	total: 378ms	remaining: 2.5s
131:	learn: 0.0031405	total: 379ms	remaining: 2.5s
132:	learn: 0.0030755	total: 381ms	remaining: 2.48s
133:	learn: 0.0030243	total: 383ms	remaining: 2.48s
134:	learn: 0.0029628	total: 385ms	remaining: 2.47s
135:	learn: 0.0029228	total: 387ms	remaining: 2.46s
136:	learn: 0.0028774	total: 389ms	remaining: 2.45s
137:	learn: 0.0028360	total: 391ms	remaining: 2.44s
138:	learn: 0.0027906	total: 392ms	remaining: 2.43s
139:	learn: 0.0027589	total: 394ms	remaining: 2.42s
140:	learn: 0.0027129	total: 396ms	remaining: 2.41s
141:	learn: 0.0026700	total: 398ms	remaining: 2.4s
142:	learn: 0.0026261	total: 400ms	remaining: 2.39s
143:	learn: 0.0025782	total: 402ms	remaining: 2.39s
144:	learn: 0.0025366	total: 404ms	remaining: 2.38s
145:	learn: 0.0024990	total: 406ms	remaining: 2.37s
146:	learn: 0.0024730	total: 408ms	remaining: 2.37s
147:	learn: 0.0024353	total: 410ms	remaining: 2.36s
148:	learn: 0.0024029	total: 411ms	remaining: 2.35s
149:	learn: 0.0023637	total: 413ms	remaining: 2.34s
150:	learn: 0.0023300	total: 416ms	remaining: 2.34s
151:	learn: 0.0023107	total: 418ms	remaining: 2.33s
152:	learn: 0.0022769	total: 419ms	remaining: 2.32s
153:	learn: 0.0022531	total: 422ms	remaining: 2.31s
154:	learn: 0.0022287	total: 424ms	remaining: 2.31s
155:	learn: 0.0021999	total: 426ms	remaining: 2.3s
156:	learn: 0.0021744	total: 428ms	remaining: 2.29s
157:	learn: 0.0021601	total: 429ms	remaining: 2.29s
158:	learn: 0.0021286	total: 431ms	remaining: 2.28s
159:	learn: 0.0021036	total: 433ms	remaining: 2.27s
160:	learn: 0.0020765	total: 435ms	remaining: 2.27s
161:	learn: 0.0020371	total: 437ms	remaining: 2.26s
162:	learn: 0.0020094	total: 439ms	remaining: 2.25s
163:	learn: 0.0019701	total: 440ms	remaining: 2.24s
164:	learn: 0.0019549	total: 442ms	remaining: 2.24s
165:	learn: 0.0019282	total: 444ms	remaining: 2.23s
166:	learn: 0.0019086	total: 446ms	remaining: 2.22s
167:	learn: 0.0018906	total: 448ms	remaining: 2.22s
168:	learn: 0.0018377	total: 449ms	remaining: 2.21s
169:	learn: 0.0018188	total: 451ms	remaining: 2.2s
170:	learn: 0.0018037	total: 453ms	remaining: 2.19s
171:	learn: 0.0017876	total: 454ms	remaining: 2.19s
172:	learn: 0.0017668	total: 456ms	remaining: 2.18s
173:	learn: 0.0017487	total: 458ms	remaining: 2.17s
174:	learn: 0.0017295	total: 460ms	remaining: 2.17s
175:	learn: 0.0017131	total: 462ms	remaining: 2.16s
176:	learn: 0.0016937	total: 464ms	remaining: 2.16s
177:	learn: 0.0016736	total: 466ms	remaining: 2.15s
178:	learn: 0.0016552	total: 467ms	remaining: 2.14s
179:	learn: 0.0016415	total: 469ms	remaining: 2.14s
180:	learn: 0.0016151	total: 471ms	remaining: 2.13s
181:	learn: 0.0015990	total: 473ms	remaining: 2.13s
182:	learn: 0.0015800	total: 475ms	remaining: 2.12s
183:	learn: 0.0015635	total: 477ms	remaining: 2.11s
184:	learn: 0.0015469	total: 479ms	remaining: 2.11s
185:	learn: 0.0015336	total: 480ms	remaining: 2.1s
186:	learn: 0.0015186	total: 482ms	remaining: 2.1s
187:	learn: 0.0015026	total: 484ms	remaining: 2.09s
188:	learn: 0.0014860	total: 486ms	remaining: 2.08s
189:	learn: 0.0014738	total: 488ms	remaining: 2.08s
190:	learn: 0.0014582	total: 490ms	remaining: 2.07s
191:	learn: 0.0014416	total: 492ms	remaining: 2.07s
192:	learn: 0.0014251	total: 494ms	remaining: 2.06s
193:	learn: 0.0014124	total: 496ms	remaining: 2.06s
194:	learn: 0.0014002	total: 497ms	remaining: 2.05s
195:	learn: 0.0013874	total: 499ms	remaining: 2.05s
196:	learn: 0.0013773	total: 501ms	remaining: 2.04s
197:	learn: 0.0013656	total: 503ms	remaining: 2.04s
198:	learn: 0.0013540	total: 505ms	remaining: 2.03s
199:	learn: 0.0013433	total: 506ms	remaining: 2.02s
200:	learn: 0.0013311	total: 508ms	remaining: 2.02s
201:	learn: 0.0013179	total: 510ms	remaining: 2.01s
202:	learn: 0.0013051	total: 512ms	remaining: 2.01s
203:	learn: 0.0012973	total: 514ms	remaining: 2s
204:	learn: 0.0012872	total: 515ms	remaining: 2s
205:	learn: 0.0012720	total: 517ms	remaining: 1.99s
206:	learn: 0.0012602	total: 519ms	remaining: 1.99s
207:	learn: 0.0012497	total: 521ms	remaining: 1.98s
208:	learn: 0.0012349	total: 523ms	remaining: 1.98s
209:	learn: 0.0012213	total: 525ms	remaining: 1.97s
210:	learn: 0.0012147	total: 526ms	remaining: 1.97s
211:	learn: 0.0012052	total: 528ms	remaining: 1.96s
212:	learn: 0.0011942	total: 530ms	remaining: 1.96s
213:	learn: 0.0011858	total: 532ms	remaining: 1.95s
214:	learn: 0.0011772	total: 534ms	remaining: 1.95s
215:	learn: 0.0011701	total: 536ms	remaining: 1.94s
216:	learn: 0.0011615	total: 538ms	remaining: 1.94s
217:	learn: 0.0011517	total: 540ms	remaining: 1.94s
218:	learn: 0.0011435	total: 541ms	remaining: 1.93s
219:	learn: 0.0011332	total: 543ms	remaining: 1.93s
220:	learn: 0.0011232	total: 545ms	remaining: 1.92s
221:	learn: 0.0011139	total: 547ms	remaining: 1.92s
222:	learn: 0.0011025	total: 549ms	remaining: 1.91s
223:	learn: 0.0010941	total: 551ms	remaining: 1.91s
224:	learn: 0.0010841	total: 552ms	remaining: 1.9s
225:	learn: 0.0010776	total: 554ms	remaining: 1.9s
226:	learn: 0.0010776	total: 556ms	remaining: 1.89s
227:	learn: 0.0010776	total: 557ms	remaining: 1.89s
228:	learn: 0.0010696	total: 559ms	remaining: 1.88s
229:	learn: 0.0010624	total: 561ms	remaining: 1.88s
230:	learn: 0.0010555	total: 563ms	remaining: 1.87s
231:	learn: 0.0010488	total: 565ms	remaining: 1.87s
232:	learn: 0.0010400	total: 566ms	remaining: 1.86s
233:	learn: 0.0010400	total: 568ms	remaining: 1.86s
234:	learn: 0.0010309	total: 570ms	remaining: 1.85s
235:	learn: 0.0010309	total: 571ms	remaining: 1.85s
236:	learn: 0.0010229	total: 573ms	remaining: 1.84s
237:	learn: 0.0010229	total: 575ms	remaining: 1.84s
238:	learn: 0.0010164	total: 577ms	remaining: 1.84s
239:	learn: 0.0010087	total: 579ms	remaining: 1.83s
240:	learn: 0.0010021	total: 581ms	remaining: 1.83s
241:	learn: 0.0010021	total: 582ms	remaining: 1.82s
242:	learn: 0.0009934	total: 584ms	remaining: 1.82s
243:	learn: 0.0009934	total: 586ms	remaining: 1.81s
244:	learn: 0.0009855	total: 587ms	remaining: 1.81s
245:	learn: 0.0009836	total: 589ms	remaining: 1.8s
246:	learn: 0.0009836	total: 591ms	remaining: 1.8s
247:	learn: 0.0009836	total: 592ms	remaining: 1.8s
248:	learn: 0.0009780	total: 594ms	remaining: 1.79s
249:	learn: 0.0009710	total: 596ms	remaining: 1.79s
250:	learn: 0.0009622	total: 598ms	remaining: 1.78s
251:	learn: 0.0009622	total: 600ms	remaining: 1.78s
252:	learn: 0.0009550	total: 602ms	remaining: 1.78s
253:	learn: 0.0009492	total: 603ms	remaining: 1.77s
254:	learn: 0.0009492	total: 605ms	remaining: 1.77s
255:	learn: 0.0009492	total: 607ms	remaining: 1.76s
256:	learn: 0.0009492	total: 608ms	remaining: 1.76s
257:	learn: 0.0009492	total: 610ms	remaining: 1.75s
258:	learn: 0.0009424	total: 612ms	remaining: 1.75s
259:	learn: 0.0009363	total: 614ms	remaining: 1.75s
260:	learn: 0.0009296	total: 616ms	remaining: 1.74s
261:	learn: 0.0009228	total: 618ms	remaining: 1.74s
262:	learn: 0.0009228	total: 619ms	remaining: 1.74s
263:	learn: 0.0009155	total: 621ms	remaining: 1.73s
264:	learn: 0.0009086	total: 623ms	remaining: 1.73s
265:	learn: 0.0009086	total: 625ms	remaining: 1.72s
266:	learn: 0.0009086	total: 626ms	remaining: 1.72s
267:	learn: 0.0009086	total: 628ms	remaining: 1.71s
268:	learn: 0.0009086	total: 629ms	remaining: 1.71s
269:	learn: 0.0009013	total: 631ms	remaining: 1.71s
270:	learn: 0.0008944	total: 633ms	remaining: 1.7s
271:	learn: 0.0008944	total: 635ms	remaining: 1.7s
272:	learn: 0.0008944	total: 636ms	remaining: 1.69s
273:	learn: 0.0008944	total: 638ms	remaining: 1.69s
274:	learn: 0.0008944	total: 639ms	remaining: 1.69s
275:	learn: 0.0008944	total: 641ms	remaining: 1.68s
276:	learn: 0.0008869	total: 643ms	remaining: 1.68s
277:	learn: 0.0008869	total: 644ms	remaining: 1.67s
278:	learn: 0.0008869	total: 646ms	remaining: 1.67s
279:	learn: 0.0008869	total: 647ms	remaining: 1.66s
280:	learn: 0.0008869	total: 649ms	remaining: 1.66s
281:	learn: 0.0008795	total: 651ms	remaining: 1.66s
282:	learn: 0.0008719	total: 653ms	remaining: 1.65s
283:	learn: 0.0008719	total: 654ms	remaining: 1.65s
284:	learn: 0.0008719	total: 656ms	remaining: 1.65s
285:	learn: 0.0008719	total: 658ms	remaining: 1.64s
286:	learn: 0.0008719	total: 659ms	remaining: 1.64s
287:	learn: 0.0008719	total: 661ms	remaining: 1.63s
288:	learn: 0.0008641	total: 662ms	remaining: 1.63s
289:	learn: 0.0008640	total: 664ms	remaining: 1.63s
290:	learn: 0.0008557	total: 665ms	remaining: 1.62s
291:	learn: 0.0008557	total: 667ms	remaining: 1.62s
292:	learn: 0.0008557	total: 668ms	remaining: 1.61s
293:	learn: 0.0008558	total: 670ms	remaining: 1.61s
294:	learn: 0.0008505	total: 672ms	remaining: 1.6s
295:	learn: 0.0008504	total: 673ms	remaining: 1.6s
296:	learn: 0.0008504	total: 675ms	remaining: 1.6s
297:	learn: 0.0008504	total: 676ms	remaining: 1.59s
298:	learn: 0.0008504	total: 678ms	remaining: 1.59s
299:	learn: 0.0008504	total: 680ms	remaining: 1.58s
300:	learn: 0.0008448	total: 681ms	remaining: 1.58s
301:	learn: 0.0008448	total: 683ms	remaining: 1.58s
302:	learn: 0.0008448	total: 685ms	remaining: 1.57s
303:	learn: 0.0008448	total: 686ms	remaining: 1.57s
304:	learn: 0.0008448	total: 688ms	remaining: 1.57s
305:	learn: 0.0008448	total: 689ms	remaining: 1.56s
306:	learn: 0.0008448	total: 691ms	remaining: 1.56s
307:	learn: 0.0008448	total: 693ms	remaining: 1.56s
308:	learn: 0.0008448	total: 694ms	remaining: 1.55s
309:	learn: 0.0008448	total: 696ms	remaining: 1.55s
310:	learn: 0.0008448	total: 697ms	remaining: 1.54s
311:	learn: 0.0008447	total: 699ms	remaining: 1.54s
312:	learn: 0.0008448	total: 700ms	remaining: 1.54s
313:	learn: 0.0008447	total: 702ms	remaining: 1.53s
314:	learn: 0.0008447	total: 704ms	remaining: 1.53s
315:	learn: 0.0008447	total: 705ms	remaining: 1.53s
316:	learn: 0.0008447	total: 706ms	remaining: 1.52s
317:	learn: 0.0008447	total: 708ms	remaining: 1.52s
318:	learn: 0.0008447	total: 709ms	remaining: 1.51s
319:	learn: 0.0008447	total: 711ms	remaining: 1.51s
320:	learn: 0.0008447	total: 713ms	remaining: 1.51s
321:	learn: 0.0008447	total: 714ms	remaining: 1.5s
322:	learn: 0.0008447	total: 716ms	remaining: 1.5s
323:	learn: 0.0008447	total: 717ms	remaining: 1.5s
324:	learn: 0.0008447	total: 719ms	remaining: 1.49s
325:	learn: 0.0008447	total: 720ms	remaining: 1.49s
326:	learn: 0.0008447	total: 722ms	remaining: 1.49s
327:	learn: 0.0008447	total: 723ms	remaining: 1.48s
328:	learn: 0.0008447	total: 725ms	remaining: 1.48s
329:	learn: 0.0008447	total: 727ms	remaining: 1.48s
330:	learn: 0.0008447	total: 728ms	remaining: 1.47s
331:	learn: 0.0008447	total: 730ms	remaining: 1.47s
332:	learn: 0.0008447	total: 731ms	remaining: 1.46s
333:	learn: 0.0008447	total: 733ms	remaining: 1.46s
334:	learn: 0.0008447	total: 735ms	remaining: 1.46s
335:	learn: 0.0008447	total: 736ms	remaining: 1.45s
336:	learn: 0.0008447	total: 738ms	remaining: 1.45s
337:	learn: 0.0008447	total: 739ms	remaining: 1.45s
338:	learn: 0.0008447	total: 741ms	remaining: 1.44s
339:	learn: 0.0008447	total: 742ms	remaining: 1.44s
340:	learn: 0.0008447	total: 744ms	remaining: 1.44s
341:	learn: 0.0008446	total: 745ms	remaining: 1.43s
342:	learn: 0.0008447	total: 747ms	remaining: 1.43s
343:	learn: 0.0008447	total: 749ms	remaining: 1.43s
344:	learn: 0.0008395	total: 751ms	remaining: 1.43s
345:	learn: 0.0008327	total: 753ms	remaining: 1.42s
346:	learn: 0.0008327	total: 754ms	remaining: 1.42s
347:	learn: 0.0008327	total: 756ms	remaining: 1.42s
348:	learn: 0.0008327	total: 757ms	remaining: 1.41s
349:	learn: 0.0008327	total: 759ms	remaining: 1.41s
350:	learn: 0.0008327	total: 760ms	remaining: 1.41s
351:	learn: 0.0008327	total: 762ms	remaining: 1.4s
352:	learn: 0.0008327	total: 763ms	remaining: 1.4s
353:	learn: 0.0008327	total: 765ms	remaining: 1.4s
354:	learn: 0.0008326	total: 766ms	remaining: 1.39s
355:	learn: 0.0008326	total: 768ms	remaining: 1.39s
356:	learn: 0.0008326	total: 770ms	remaining: 1.39s
357:	learn: 0.0008326	total: 771ms	remaining: 1.38s
358:	learn: 0.0008326	total: 773ms	remaining: 1.38s
359:	learn: 0.0008326	total: 774ms	remaining: 1.38s
360:	learn: 0.0008260	total: 776ms	remaining: 1.37s
361:	learn: 0.0008260	total: 778ms	remaining: 1.37s
362:	learn: 0.0008260	total: 779ms	remaining: 1.37s
363:	learn: 0.0008260	total: 781ms	remaining: 1.36s
364:	learn: 0.0008260	total: 783ms	remaining: 1.36s
365:	learn: 0.0008260	total: 784ms	remaining: 1.36s
366:	learn: 0.0008260	total: 786ms	remaining: 1.35s
367:	learn: 0.0008260	total: 787ms	remaining: 1.35s
368:	learn: 0.0008260	total: 789ms	remaining: 1.35s
369:	learn: 0.0008260	total: 790ms	remaining: 1.34s
370:	learn: 0.0008260	total: 792ms	remaining: 1.34s
371:	learn: 0.0008260	total: 794ms	remaining: 1.34s
372:	learn: 0.0008260	total: 795ms	remaining: 1.34s
373:	learn: 0.0008260	total: 797ms	remaining: 1.33s
374:	learn: 0.0008260	total: 799ms	remaining: 1.33s
375:	learn: 0.0008260	total: 800ms	remaining: 1.33s
376:	learn: 0.0008259	total: 802ms	remaining: 1.32s
377:	learn: 0.0008204	total: 804ms	remaining: 1.32s
378:	learn: 0.0008204	total: 805ms	remaining: 1.32s
379:	learn: 0.0008204	total: 807ms	remaining: 1.32s
380:	learn: 0.0008204	total: 809ms	remaining: 1.31s
381:	learn: 0.0008204	total: 810ms	remaining: 1.31s
382:	learn: 0.0008204	total: 812ms	remaining: 1.31s
383:	learn: 0.0008203	total: 814ms	remaining: 1.3s
384:	learn: 0.0008204	total: 815ms	remaining: 1.3s
385:	learn: 0.0008203	total: 817ms	remaining: 1.3s
386:	learn: 0.0008204	total: 819ms	remaining: 1.3s
387:	learn: 0.0008203	total: 821ms	remaining: 1.29s
388:	learn: 0.0008203	total: 822ms	remaining: 1.29s
389:	learn: 0.0008203	total: 824ms	remaining: 1.29s
390:	learn: 0.0008137	total: 826ms	remaining: 1.29s
391:	learn: 0.0008136	total: 828ms	remaining: 1.28s
392:	learn: 0.0008137	total: 829ms	remaining: 1.28s
393:	learn: 0.0008137	total: 831ms	remaining: 1.28s
394:	learn: 0.0008136	total: 833ms	remaining: 1.27s
395:	learn: 0.0008136	total: 834ms	remaining: 1.27s
396:	learn: 0.0008136	total: 836ms	remaining: 1.27s
397:	learn: 0.0008136	total: 838ms	remaining: 1.27s
398:	learn: 0.0008136	total: 839ms	remaining: 1.26s
399:	learn: 0.0008136	total: 841ms	remaining: 1.26s
400:	learn: 0.0008136	total: 843ms	remaining: 1.26s
401:	learn: 0.0008136	total: 844ms	remaining: 1.25s
402:	learn: 0.0008136	total: 846ms	remaining: 1.25s
403:	learn: 0.0008136	total: 848ms	remaining: 1.25s
404:	learn: 0.0008136	total: 849ms	remaining: 1.25s
405:	learn: 0.0008136	total: 851ms	remaining: 1.24s
406:	learn: 0.0008136	total: 853ms	remaining: 1.24s
407:	learn: 0.0008136	total: 854ms	remaining: 1.24s
408:	learn: 0.0008136	total: 856ms	remaining: 1.24s
409:	learn: 0.0008136	total: 858ms	remaining: 1.23s
410:	learn: 0.0008136	total: 859ms	remaining: 1.23s
411:	learn: 0.0008136	total: 861ms	remaining: 1.23s
412:	learn: 0.0008136	total: 863ms	remaining: 1.23s
413:	learn: 0.0008136	total: 864ms	remaining: 1.22s
414:	learn: 0.0008136	total: 866ms	remaining: 1.22s
415:	learn: 0.0008136	total: 868ms	remaining: 1.22s
416:	learn: 0.0008136	total: 869ms	remaining: 1.22s
417:	learn: 0.0008136	total: 871ms	remaining: 1.21s
418:	learn: 0.0008070	total: 873ms	remaining: 1.21s
419:	learn: 0.0008070	total: 875ms	remaining: 1.21s
420:	learn: 0.0008070	total: 876ms	remaining: 1.21s
421:	learn: 0.0008070	total: 878ms	remaining: 1.2s
422:	learn: 0.0008069	total: 880ms	remaining: 1.2s
423:	learn: 0.0008070	total: 881ms	remaining: 1.2s
424:	learn: 0.0008069	total: 883ms	remaining: 1.19s
425:	learn: 0.0008069	total: 885ms	remaining: 1.19s
426:	learn: 0.0008047	total: 887ms	remaining: 1.19s
427:	learn: 0.0008047	total: 888ms	remaining: 1.19s
428:	learn: 0.0008047	total: 890ms	remaining: 1.18s
429:	learn: 0.0008047	total: 892ms	remaining: 1.18s
430:	learn: 0.0007970	total: 894ms	remaining: 1.18s
431:	learn: 0.0007970	total: 895ms	remaining: 1.18s
432:	learn: 0.0007970	total: 897ms	remaining: 1.17s
433:	learn: 0.0007970	total: 899ms	remaining: 1.17s
434:	learn: 0.0007970	total: 900ms	remaining: 1.17s
435:	learn: 0.0007970	total: 902ms	remaining: 1.17s
436:	learn: 0.0007970	total: 904ms	remaining: 1.16s
437:	learn: 0.0007970	total: 906ms	remaining: 1.16s
438:	learn: 0.0007970	total: 907ms	remaining: 1.16s
439:	learn: 0.0007970	total: 909ms	remaining: 1.16s
440:	learn: 0.0007970	total: 911ms	remaining: 1.15s
441:	learn: 0.0007970	total: 913ms	remaining: 1.15s
442:	learn: 0.0007970	total: 915ms	remaining: 1.15s
443:	learn: 0.0007970	total: 917ms	remaining: 1.15s
444:	learn: 0.0007970	total: 919ms	remaining: 1.15s
445:	learn: 0.0007970	total: 920ms	remaining: 1.14s
446:	learn: 0.0007970	total: 922ms	remaining: 1.14s
447:	learn: 0.0007970	total: 924ms	remaining: 1.14s
448:	learn: 0.0007970	total: 925ms	remaining: 1.14s
449:	learn: 0.0007970	total: 927ms	remaining: 1.13s
450:	learn: 0.0007970	total: 929ms	remaining: 1.13s
451:	learn: 0.0007970	total: 931ms	remaining: 1.13s
452:	learn: 0.0007970	total: 933ms	remaining: 1.13s
453:	learn: 0.0007970	total: 934ms	remaining: 1.12s
454:	learn: 0.0007970	total: 936ms	remaining: 1.12s
455:	learn: 0.0007970	total: 938ms	remaining: 1.12s
456:	learn: 0.0007970	total: 940ms	remaining: 1.12s
457:	learn: 0.0007970	total: 942ms	remaining: 1.11s
458:	learn: 0.0007970	total: 944ms	remaining: 1.11s
459:	learn: 0.0007970	total: 946ms	remaining: 1.11s
460:	learn: 0.0007970	total: 948ms	remaining: 1.11s
461:	learn: 0.0007970	total: 950ms	remaining: 1.1s
462:	learn: 0.0007970	total: 951ms	remaining: 1.1s
463:	learn: 0.0007970	total: 953ms	remaining: 1.1s
464:	learn: 0.0007970	total: 955ms	remaining: 1.1s
465:	learn: 0.0007970	total: 957ms	remaining: 1.1s
466:	learn: 0.0007908	total: 959ms	remaining: 1.09s
467:	learn: 0.0007907	total: 961ms	remaining: 1.09s
468:	learn: 0.0007907	total: 963ms	remaining: 1.09s
469:	learn: 0.0007907	total: 965ms	remaining: 1.09s
470:	learn: 0.0007907	total: 966ms	remaining: 1.08s
471:	learn: 0.0007907	total: 968ms	remaining: 1.08s
472:	learn: 0.0007907	total: 970ms	remaining: 1.08s
473:	learn: 0.0007907	total: 972ms	remaining: 1.08s
474:	learn: 0.0007907	total: 975ms	remaining: 1.08s
475:	learn: 0.0007907	total: 976ms	remaining: 1.07s
476:	learn: 0.0007907	total: 979ms	remaining: 1.07s
477:	learn: 0.0007907	total: 981ms	remaining: 1.07s
478:	learn: 0.0007907	total: 982ms	remaining: 1.07s
479:	learn: 0.0007907	total: 984ms	remaining: 1.07s
480:	learn: 0.0007907	total: 986ms	remaining: 1.06s
481:	learn: 0.0007907	total: 988ms	remaining: 1.06s
482:	learn: 0.0007907	total: 991ms	remaining: 1.06s
483:	learn: 0.0007907	total: 992ms	remaining: 1.06s
484:	learn: 0.0007907	total: 995ms	remaining: 1.06s
485:	learn: 0.0007907	total: 997ms	remaining: 1.05s
486:	learn: 0.0007907	total: 998ms	remaining: 1.05s
487:	learn: 0.0007907	total: 1s	remaining: 1.05s
488:	learn: 0.0007907	total: 1s	remaining: 1.05s
489:	learn: 0.0007907	total: 1s	remaining: 1.04s
490:	learn: 0.0007907	total: 1s	remaining: 1.04s
491:	learn: 0.0007907	total: 1.01s	remaining: 1.04s
492:	learn: 0.0007907	total: 1.01s	remaining: 1.04s
493:	learn: 0.0007907	total: 1.01s	remaining: 1.04s
494:	learn: 0.0007907	total: 1.01s	remaining: 1.03s
495:	learn: 0.0007907	total: 1.01s	remaining: 1.03s
496:	learn: 0.0007907	total: 1.02s	remaining: 1.03s
497:	learn: 0.0007907	total: 1.02s	remaining: 1.03s
498:	learn: 0.0007907	total: 1.02s	remaining: 1.02s
499:	learn: 0.0007906	total: 1.02s	remaining: 1.02s
500:	learn: 0.0007906	total: 1.02s	remaining: 1.02s
501:	learn: 0.0007906	total: 1.02s	remaining: 1.02s
502:	learn: 0.0007906	total: 1.03s	remaining: 1.01s
503:	learn: 0.0007906	total: 1.03s	remaining: 1.01s
504:	learn: 0.0007906	total: 1.03s	remaining: 1.01s
505:	learn: 0.0007906	total: 1.03s	remaining: 1.01s
506:	learn: 0.0007906	total: 1.03s	remaining: 1s
507:	learn: 0.0007906	total: 1.03s	remaining: 1s
508:	learn: 0.0007906	total: 1.04s	remaining: 1s
509:	learn: 0.0007906	total: 1.04s	remaining: 999ms
510:	learn: 0.0007906	total: 1.04s	remaining: 996ms
511:	learn: 0.0007906	total: 1.04s	remaining: 994ms
512:	learn: 0.0007906	total: 1.04s	remaining: 992ms
513:	learn: 0.0007906	total: 1.05s	remaining: 989ms
514:	learn: 0.0007906	total: 1.05s	remaining: 987ms
515:	learn: 0.0007906	total: 1.05s	remaining: 985ms
516:	learn: 0.0007906	total: 1.05s	remaining: 982ms
517:	learn: 0.0007906	total: 1.05s	remaining: 980ms
518:	learn: 0.0007906	total: 1.05s	remaining: 978ms
519:	learn: 0.0007906	total: 1.06s	remaining: 975ms
520:	learn: 0.0007906	total: 1.06s	remaining: 973ms
521:	learn: 0.0007906	total: 1.06s	remaining: 970ms
522:	learn: 0.0007906	total: 1.06s	remaining: 968ms
523:	learn: 0.0007906	total: 1.06s	remaining: 966ms
524:	learn: 0.0007906	total: 1.06s	remaining: 963ms
525:	learn: 0.0007906	total: 1.07s	remaining: 961ms
526:	learn: 0.0007906	total: 1.07s	remaining: 959ms
527:	learn: 0.0007906	total: 1.07s	remaining: 956ms
528:	learn: 0.0007905	total: 1.07s	remaining: 954ms
529:	learn: 0.0007906	total: 1.07s	remaining: 952ms
530:	learn: 0.0007905	total: 1.07s	remaining: 949ms
531:	learn: 0.0007905	total: 1.08s	remaining: 947ms
532:	learn: 0.0007905	total: 1.08s	remaining: 945ms
533:	learn: 0.0007905	total: 1.08s	remaining: 942ms
534:	learn: 0.0007905	total: 1.08s	remaining: 940ms
535:	learn: 0.0007905	total: 1.08s	remaining: 938ms
536:	learn: 0.0007905	total: 1.08s	remaining: 935ms
537:	learn: 0.0007905	total: 1.09s	remaining: 933ms
538:	learn: 0.0007905	total: 1.09s	remaining: 931ms
539:	learn: 0.0007905	total: 1.09s	remaining: 928ms
540:	learn: 0.0007905	total: 1.09s	remaining: 926ms
541:	learn: 0.0007905	total: 1.09s	remaining: 924ms
542:	learn: 0.0007905	total: 1.09s	remaining: 921ms
543:	learn: 0.0007905	total: 1.1s	remaining: 919ms
544:	learn: 0.0007905	total: 1.1s	remaining: 917ms
545:	learn: 0.0007905	total: 1.1s	remaining: 915ms
546:	learn: 0.0007905	total: 1.1s	remaining: 912ms
547:	learn: 0.0007905	total: 1.1s	remaining: 910ms
548:	learn: 0.0007905	total: 1.1s	remaining: 908ms
549:	learn: 0.0007905	total: 1.11s	remaining: 905ms
550:	learn: 0.0007905	total: 1.11s	remaining: 903ms
551:	learn: 0.0007905	total: 1.11s	remaining: 901ms
552:	learn: 0.0007905	total: 1.11s	remaining: 899ms
553:	learn: 0.0007905	total: 1.11s	remaining: 896ms
554:	learn: 0.0007905	total: 1.11s	remaining: 894ms
555:	learn: 0.0007905	total: 1.12s	remaining: 892ms
556:	learn: 0.0007905	total: 1.12s	remaining: 890ms
557:	learn: 0.0007905	total: 1.12s	remaining: 887ms
558:	learn: 0.0007905	total: 1.12s	remaining: 885ms
559:	learn: 0.0007904	total: 1.12s	remaining: 883ms
560:	learn: 0.0007904	total: 1.13s	remaining: 881ms
561:	learn: 0.0007904	total: 1.13s	remaining: 878ms
562:	learn: 0.0007904	total: 1.13s	remaining: 876ms
563:	learn: 0.0007904	total: 1.13s	remaining: 874ms
564:	learn: 0.0007904	total: 1.13s	remaining: 871ms
565:	learn: 0.0007904	total: 1.13s	remaining: 869ms
566:	learn: 0.0007904	total: 1.14s	remaining: 867ms
567:	learn: 0.0007904	total: 1.14s	remaining: 865ms
568:	learn: 0.0007904	total: 1.14s	remaining: 862ms
569:	learn: 0.0007904	total: 1.14s	remaining: 860ms
570:	learn: 0.0007904	total: 1.14s	remaining: 858ms
571:	learn: 0.0007904	total: 1.14s	remaining: 856ms
572:	learn: 0.0007904	total: 1.15s	remaining: 854ms
573:	learn: 0.0007904	total: 1.15s	remaining: 852ms
574:	learn: 0.0007904	total: 1.15s	remaining: 849ms
575:	learn: 0.0007904	total: 1.15s	remaining: 847ms
576:	learn: 0.0007904	total: 1.15s	remaining: 845ms
577:	learn: 0.0007904	total: 1.15s	remaining: 843ms
578:	learn: 0.0007904	total: 1.16s	remaining: 841ms
579:	learn: 0.0007904	total: 1.16s	remaining: 839ms
580:	learn: 0.0007904	total: 1.16s	remaining: 836ms
581:	learn: 0.0007904	total: 1.16s	remaining: 834ms
582:	learn: 0.0007904	total: 1.16s	remaining: 832ms
583:	learn: 0.0007904	total: 1.16s	remaining: 830ms
584:	learn: 0.0007904	total: 1.17s	remaining: 828ms
585:	learn: 0.0007904	total: 1.17s	remaining: 825ms
586:	learn: 0.0007904	total: 1.17s	remaining: 823ms
587:	learn: 0.0007904	total: 1.17s	remaining: 821ms
588:	learn: 0.0007904	total: 1.17s	remaining: 819ms
589:	learn: 0.0007904	total: 1.18s	remaining: 817ms
590:	learn: 0.0007903	total: 1.18s	remaining: 814ms
591:	learn: 0.0007904	total: 1.18s	remaining: 812ms
592:	learn: 0.0007904	total: 1.18s	remaining: 810ms
593:	learn: 0.0007903	total: 1.18s	remaining: 808ms
594:	learn: 0.0007903	total: 1.18s	remaining: 806ms
595:	learn: 0.0007903	total: 1.19s	remaining: 804ms
596:	learn: 0.0007903	total: 1.19s	remaining: 801ms
597:	learn: 0.0007903	total: 1.19s	remaining: 799ms
598:	learn: 0.0007903	total: 1.19s	remaining: 797ms
599:	learn: 0.0007903	total: 1.19s	remaining: 795ms
600:	learn: 0.0007903	total: 1.19s	remaining: 793ms
601:	learn: 0.0007903	total: 1.2s	remaining: 791ms
602:	learn: 0.0007903	total: 1.2s	remaining: 789ms
603:	learn: 0.0007903	total: 1.2s	remaining: 787ms
604:	learn: 0.0007903	total: 1.2s	remaining: 784ms
605:	learn: 0.0007903	total: 1.2s	remaining: 782ms
606:	learn: 0.0007903	total: 1.2s	remaining: 780ms
607:	learn: 0.0007903	total: 1.21s	remaining: 778ms
608:	learn: 0.0007903	total: 1.21s	remaining: 776ms
609:	learn: 0.0007903	total: 1.21s	remaining: 773ms
610:	learn: 0.0007903	total: 1.21s	remaining: 771ms
611:	learn: 0.0007903	total: 1.21s	remaining: 769ms
612:	learn: 0.0007903	total: 1.21s	remaining: 767ms
613:	learn: 0.0007903	total: 1.22s	remaining: 765ms
614:	learn: 0.0007903	total: 1.22s	remaining: 763ms
615:	learn: 0.0007903	total: 1.22s	remaining: 761ms
616:	learn: 0.0007903	total: 1.22s	remaining: 759ms
617:	learn: 0.0007903	total: 1.22s	remaining: 756ms
618:	learn: 0.0007903	total: 1.23s	remaining: 754ms
619:	learn: 0.0007903	total: 1.23s	remaining: 752ms
620:	learn: 0.0007903	total: 1.23s	remaining: 752ms
621:	learn: 0.0007903	total: 1.24s	remaining: 751ms
622:	learn: 0.0007903	total: 1.24s	remaining: 751ms
623:	learn: 0.0007903	total: 1.24s	remaining: 750ms
624:	learn: 0.0007902	total: 1.25s	remaining: 748ms
625:	learn: 0.0007902	total: 1.25s	remaining: 746ms
626:	learn: 0.0007902	total: 1.25s	remaining: 745ms
627:	learn: 0.0007902	total: 1.25s	remaining: 743ms
628:	learn: 0.0007902	total: 1.26s	remaining: 741ms
629:	learn: 0.0007902	total: 1.26s	remaining: 739ms
630:	learn: 0.0007902	total: 1.26s	remaining: 737ms
631:	learn: 0.0007902	total: 1.26s	remaining: 735ms
632:	learn: 0.0007902	total: 1.26s	remaining: 733ms
633:	learn: 0.0007902	total: 1.27s	remaining: 731ms
634:	learn: 0.0007902	total: 1.27s	remaining: 729ms
635:	learn: 0.0007902	total: 1.27s	remaining: 727ms
636:	learn: 0.0007902	total: 1.27s	remaining: 725ms
637:	learn: 0.0007902	total: 1.27s	remaining: 723ms
638:	learn: 0.0007902	total: 1.27s	remaining: 721ms
639:	learn: 0.0007902	total: 1.28s	remaining: 718ms
640:	learn: 0.0007902	total: 1.28s	remaining: 716ms
641:	learn: 0.0007902	total: 1.28s	remaining: 714ms
642:	learn: 0.0007902	total: 1.28s	remaining: 712ms
643:	learn: 0.0007902	total: 1.28s	remaining: 710ms
644:	learn: 0.0007902	total: 1.28s	remaining: 708ms
645:	learn: 0.0007902	total: 1.29s	remaining: 706ms
646:	learn: 0.0007902	total: 1.29s	remaining: 704ms
647:	learn: 0.0007902	total: 1.29s	remaining: 701ms
648:	learn: 0.0007902	total: 1.29s	remaining: 699ms
649:	learn: 0.0007902	total: 1.29s	remaining: 697ms
650:	learn: 0.0007902	total: 1.3s	remaining: 695ms
651:	learn: 0.0007902	total: 1.3s	remaining: 693ms
652:	learn: 0.0007901	total: 1.3s	remaining: 691ms
653:	learn: 0.0007901	total: 1.3s	remaining: 688ms
654:	learn: 0.0007902	total: 1.3s	remaining: 686ms
655:	learn: 0.0007901	total: 1.3s	remaining: 684ms
656:	learn: 0.0007901	total: 1.31s	remaining: 682ms
657:	learn: 0.0007901	total: 1.31s	remaining: 680ms
658:	learn: 0.0007901	total: 1.31s	remaining: 678ms
659:	learn: 0.0007901	total: 1.31s	remaining: 676ms
660:	learn: 0.0007901	total: 1.31s	remaining: 674ms
661:	learn: 0.0007901	total: 1.31s	remaining: 671ms
662:	learn: 0.0007901	total: 1.32s	remaining: 669ms
663:	learn: 0.0007901	total: 1.32s	remaining: 667ms
664:	learn: 0.0007901	total: 1.32s	remaining: 665ms
665:	learn: 0.0007901	total: 1.32s	remaining: 663ms
666:	learn: 0.0007901	total: 1.32s	remaining: 661ms
667:	learn: 0.0007901	total: 1.32s	remaining: 659ms
668:	learn: 0.0007901	total: 1.33s	remaining: 657ms
669:	learn: 0.0007901	total: 1.33s	remaining: 654ms
670:	learn: 0.0007901	total: 1.33s	remaining: 652ms
671:	learn: 0.0007901	total: 1.33s	remaining: 650ms
672:	learn: 0.0007901	total: 1.33s	remaining: 648ms
673:	learn: 0.0007901	total: 1.33s	remaining: 646ms
674:	learn: 0.0007901	total: 1.34s	remaining: 644ms
675:	learn: 0.0007901	total: 1.34s	remaining: 642ms
676:	learn: 0.0007901	total: 1.34s	remaining: 640ms
677:	learn: 0.0007901	total: 1.34s	remaining: 638ms
678:	learn: 0.0007901	total: 1.34s	remaining: 635ms
679:	learn: 0.0007901	total: 1.34s	remaining: 633ms
680:	learn: 0.0007901	total: 1.35s	remaining: 631ms
681:	learn: 0.0007901	total: 1.35s	remaining: 629ms
682:	learn: 0.0007901	total: 1.35s	remaining: 627ms
683:	learn: 0.0007901	total: 1.35s	remaining: 625ms
684:	learn: 0.0007900	total: 1.35s	remaining: 623ms
685:	learn: 0.0007901	total: 1.36s	remaining: 621ms
686:	learn: 0.0007900	total: 1.36s	remaining: 619ms
687:	learn: 0.0007900	total: 1.36s	remaining: 617ms
688:	learn: 0.0007900	total: 1.36s	remaining: 615ms
689:	learn: 0.0007900	total: 1.36s	remaining: 613ms
690:	learn: 0.0007900	total: 1.36s	remaining: 610ms
691:	learn: 0.0007900	total: 1.37s	remaining: 608ms
692:	learn: 0.0007900	total: 1.37s	remaining: 606ms
693:	learn: 0.0007900	total: 1.37s	remaining: 604ms
694:	learn: 0.0007900	total: 1.37s	remaining: 602ms
695:	learn: 0.0007900	total: 1.37s	remaining: 600ms
696:	learn: 0.0007900	total: 1.37s	remaining: 598ms
697:	learn: 0.0007900	total: 1.38s	remaining: 596ms
698:	learn: 0.0007900	total: 1.38s	remaining: 593ms
699:	learn: 0.0007900	total: 1.38s	remaining: 591ms
700:	learn: 0.0007900	total: 1.38s	remaining: 589ms
701:	learn: 0.0007900	total: 1.38s	remaining: 587ms
702:	learn: 0.0007900	total: 1.38s	remaining: 585ms
703:	learn: 0.0007900	total: 1.39s	remaining: 583ms
704:	learn: 0.0007900	total: 1.39s	remaining: 581ms
705:	learn: 0.0007900	total: 1.39s	remaining: 579ms
706:	learn: 0.0007900	total: 1.39s	remaining: 577ms
707:	learn: 0.0007900	total: 1.39s	remaining: 574ms
708:	learn: 0.0007900	total: 1.39s	remaining: 572ms
709:	learn: 0.0007900	total: 1.4s	remaining: 570ms
710:	learn: 0.0007899	total: 1.4s	remaining: 568ms
711:	learn: 0.0007899	total: 1.4s	remaining: 566ms
712:	learn: 0.0007900	total: 1.4s	remaining: 564ms
713:	learn: 0.0007899	total: 1.4s	remaining: 562ms
714:	learn: 0.0007899	total: 1.4s	remaining: 560ms
715:	learn: 0.0007899	total: 1.41s	remaining: 558ms
716:	learn: 0.0007899	total: 1.41s	remaining: 556ms
717:	learn: 0.0007899	total: 1.41s	remaining: 554ms
718:	learn: 0.0007899	total: 1.41s	remaining: 552ms
719:	learn: 0.0007899	total: 1.41s	remaining: 550ms
720:	learn: 0.0007899	total: 1.42s	remaining: 548ms
721:	learn: 0.0007899	total: 1.42s	remaining: 545ms
722:	learn: 0.0007899	total: 1.42s	remaining: 543ms
723:	learn: 0.0007899	total: 1.42s	remaining: 541ms
724:	learn: 0.0007899	total: 1.42s	remaining: 539ms
725:	learn: 0.0007899	total: 1.42s	remaining: 537ms
726:	learn: 0.0007899	total: 1.43s	remaining: 535ms
727:	learn: 0.0007899	total: 1.43s	remaining: 533ms
728:	learn: 0.0007899	total: 1.43s	remaining: 531ms
729:	learn: 0.0007899	total: 1.43s	remaining: 529ms
730:	learn: 0.0007899	total: 1.43s	remaining: 527ms
731:	learn: 0.0007899	total: 1.43s	remaining: 525ms
732:	learn: 0.0007899	total: 1.44s	remaining: 523ms
733:	learn: 0.0007899	total: 1.44s	remaining: 521ms
734:	learn: 0.0007899	total: 1.44s	remaining: 519ms
735:	learn: 0.0007899	total: 1.44s	remaining: 517ms
736:	learn: 0.0007898	total: 1.44s	remaining: 515ms
737:	learn: 0.0007899	total: 1.44s	remaining: 513ms
738:	learn: 0.0007899	total: 1.45s	remaining: 511ms
739:	learn: 0.0007898	total: 1.45s	remaining: 509ms
740:	learn: 0.0007898	total: 1.45s	remaining: 507ms
741:	learn: 0.0007899	total: 1.45s	remaining: 505ms
742:	learn: 0.0007899	total: 1.45s	remaining: 502ms
743:	learn: 0.0007898	total: 1.45s	remaining: 500ms
744:	learn: 0.0007898	total: 1.46s	remaining: 498ms
745:	learn: 0.0007898	total: 1.46s	remaining: 496ms
746:	learn: 0.0007898	total: 1.46s	remaining: 494ms
747:	learn: 0.0007898	total: 1.46s	remaining: 492ms
748:	learn: 0.0007898	total: 1.46s	remaining: 490ms
749:	learn: 0.0007898	total: 1.46s	remaining: 488ms
750:	learn: 0.0007898	total: 1.47s	remaining: 486ms
751:	learn: 0.0007898	total: 1.47s	remaining: 484ms
752:	learn: 0.0007898	total: 1.47s	remaining: 482ms
753:	learn: 0.0007898	total: 1.47s	remaining: 480ms
754:	learn: 0.0007898	total: 1.47s	remaining: 478ms
755:	learn: 0.0007898	total: 1.47s	remaining: 476ms
756:	learn: 0.0007898	total: 1.48s	remaining: 474ms
757:	learn: 0.0007898	total: 1.48s	remaining: 472ms
758:	learn: 0.0007898	total: 1.48s	remaining: 470ms
759:	learn: 0.0007898	total: 1.48s	remaining: 468ms
760:	learn: 0.0007897	total: 1.48s	remaining: 466ms
761:	learn: 0.0007897	total: 1.48s	remaining: 463ms
762:	learn: 0.0007897	total: 1.49s	remaining: 461ms
763:	learn: 0.0007897	total: 1.49s	remaining: 459ms
764:	learn: 0.0007897	total: 1.49s	remaining: 457ms
765:	learn: 0.0007897	total: 1.49s	remaining: 455ms
766:	learn: 0.0007897	total: 1.49s	remaining: 453ms
767:	learn: 0.0007897	total: 1.49s	remaining: 451ms
768:	learn: 0.0007897	total: 1.5s	remaining: 449ms
769:	learn: 0.0007897	total: 1.5s	remaining: 447ms
770:	learn: 0.0007897	total: 1.5s	remaining: 445ms
771:	learn: 0.0007897	total: 1.5s	remaining: 443ms
772:	learn: 0.0007897	total: 1.5s	remaining: 441ms
773:	learn: 0.0007897	total: 1.5s	remaining: 439ms
774:	learn: 0.0007897	total: 1.51s	remaining: 437ms
775:	learn: 0.0007897	total: 1.51s	remaining: 435ms
776:	learn: 0.0007897	total: 1.51s	remaining: 433ms
777:	learn: 0.0007897	total: 1.51s	remaining: 431ms
778:	learn: 0.0007897	total: 1.51s	remaining: 429ms
779:	learn: 0.0007897	total: 1.51s	remaining: 427ms
780:	learn: 0.0007897	total: 1.52s	remaining: 425ms
781:	learn: 0.0007897	total: 1.52s	remaining: 423ms
782:	learn: 0.0007897	total: 1.52s	remaining: 421ms
783:	learn: 0.0007897	total: 1.52s	remaining: 419ms
784:	learn: 0.0007897	total: 1.52s	remaining: 417ms
785:	learn: 0.0007896	total: 1.52s	remaining: 415ms
786:	learn: 0.0007896	total: 1.53s	remaining: 413ms
787:	learn: 0.0007896	total: 1.53s	remaining: 411ms
788:	learn: 0.0007896	total: 1.53s	remaining: 409ms
789:	learn: 0.0007896	total: 1.53s	remaining: 407ms
790:	learn: 0.0007896	total: 1.53s	remaining: 405ms
791:	learn: 0.0007896	total: 1.53s	remaining: 403ms
792:	learn: 0.0007896	total: 1.53s	remaining: 401ms
793:	learn: 0.0007896	total: 1.54s	remaining: 399ms
794:	learn: 0.0007896	total: 1.54s	remaining: 397ms
795:	learn: 0.0007896	total: 1.54s	remaining: 395ms
796:	learn: 0.0007896	total: 1.54s	remaining: 393ms
797:	learn: 0.0007896	total: 1.54s	remaining: 391ms
798:	learn: 0.0007896	total: 1.54s	remaining: 389ms
799:	learn: 0.0007896	total: 1.55s	remaining: 387ms
800:	learn: 0.0007896	total: 1.55s	remaining: 385ms
801:	learn: 0.0007896	total: 1.55s	remaining: 383ms
802:	learn: 0.0007896	total: 1.55s	remaining: 381ms
803:	learn: 0.0007896	total: 1.55s	remaining: 379ms
804:	learn: 0.0007896	total: 1.55s	remaining: 377ms
805:	learn: 0.0007896	total: 1.56s	remaining: 375ms
806:	learn: 0.0007896	total: 1.56s	remaining: 373ms
807:	learn: 0.0007895	total: 1.56s	remaining: 371ms
808:	learn: 0.0007895	total: 1.56s	remaining: 369ms
809:	learn: 0.0007895	total: 1.56s	remaining: 367ms
810:	learn: 0.0007895	total: 1.56s	remaining: 365ms
811:	learn: 0.0007896	total: 1.57s	remaining: 363ms
812:	learn: 0.0007895	total: 1.57s	remaining: 361ms
813:	learn: 0.0007895	total: 1.57s	remaining: 359ms
814:	learn: 0.0007895	total: 1.57s	remaining: 357ms
815:	learn: 0.0007895	total: 1.57s	remaining: 355ms
816:	learn: 0.0007895	total: 1.57s	remaining: 353ms
817:	learn: 0.0007895	total: 1.58s	remaining: 351ms
818:	learn: 0.0007895	total: 1.58s	remaining: 349ms
819:	learn: 0.0007895	total: 1.58s	remaining: 347ms
820:	learn: 0.0007895	total: 1.58s	remaining: 345ms
821:	learn: 0.0007895	total: 1.58s	remaining: 343ms
822:	learn: 0.0007895	total: 1.58s	remaining: 341ms
823:	learn: 0.0007895	total: 1.59s	remaining: 339ms
824:	learn: 0.0007895	total: 1.59s	remaining: 337ms
825:	learn: 0.0007895	total: 1.59s	remaining: 335ms
826:	learn: 0.0007895	total: 1.59s	remaining: 333ms
827:	learn: 0.0007895	total: 1.59s	remaining: 331ms
828:	learn: 0.0007895	total: 1.6s	remaining: 329ms
829:	learn: 0.0007895	total: 1.6s	remaining: 327ms
830:	learn: 0.0007895	total: 1.6s	remaining: 325ms
831:	learn: 0.0007894	total: 1.6s	remaining: 323ms
832:	learn: 0.0007894	total: 1.6s	remaining: 321ms
833:	learn: 0.0007894	total: 1.6s	remaining: 319ms
834:	learn: 0.0007894	total: 1.61s	remaining: 317ms
835:	learn: 0.0007894	total: 1.61s	remaining: 315ms
836:	learn: 0.0007894	total: 1.61s	remaining: 313ms
837:	learn: 0.0007894	total: 1.61s	remaining: 312ms
838:	learn: 0.0007894	total: 1.61s	remaining: 310ms
839:	learn: 0.0007894	total: 1.61s	remaining: 308ms
840:	learn: 0.0007894	total: 1.62s	remaining: 306ms
841:	learn: 0.0007894	total: 1.62s	remaining: 304ms
842:	learn: 0.0007894	total: 1.62s	remaining: 302ms
843:	learn: 0.0007894	total: 1.62s	remaining: 300ms
844:	learn: 0.0007894	total: 1.62s	remaining: 298ms
845:	learn: 0.0007894	total: 1.63s	remaining: 296ms
846:	learn: 0.0007894	total: 1.63s	remaining: 294ms
847:	learn: 0.0007894	total: 1.63s	remaining: 292ms
848:	learn: 0.0007894	total: 1.63s	remaining: 290ms
849:	learn: 0.0007894	total: 1.63s	remaining: 288ms
850:	learn: 0.0007894	total: 1.63s	remaining: 286ms
851:	learn: 0.0007894	total: 1.64s	remaining: 284ms
852:	learn: 0.0007894	total: 1.64s	remaining: 282ms
853:	learn: 0.0007894	total: 1.64s	remaining: 280ms
854:	learn: 0.0007894	total: 1.64s	remaining: 278ms
855:	learn: 0.0007894	total: 1.64s	remaining: 276ms
856:	learn: 0.0007893	total: 1.64s	remaining: 274ms
857:	learn: 0.0007893	total: 1.65s	remaining: 272ms
858:	learn: 0.0007893	total: 1.65s	remaining: 270ms
859:	learn: 0.0007893	total: 1.65s	remaining: 268ms
860:	learn: 0.0007893	total: 1.65s	remaining: 266ms
861:	learn: 0.0007893	total: 1.65s	remaining: 265ms
862:	learn: 0.0007893	total: 1.65s	remaining: 263ms
863:	learn: 0.0007893	total: 1.66s	remaining: 261ms
864:	learn: 0.0007893	total: 1.66s	remaining: 259ms
865:	learn: 0.0007893	total: 1.66s	remaining: 257ms
866:	learn: 0.0007893	total: 1.66s	remaining: 255ms
867:	learn: 0.0007893	total: 1.66s	remaining: 253ms
868:	learn: 0.0007893	total: 1.66s	remaining: 251ms
869:	learn: 0.0007893	total: 1.67s	remaining: 249ms
870:	learn: 0.0007893	total: 1.67s	remaining: 247ms
871:	learn: 0.0007893	total: 1.67s	remaining: 245ms
872:	learn: 0.0007892	total: 1.67s	remaining: 243ms
873:	learn: 0.0007893	total: 1.67s	remaining: 241ms
874:	learn: 0.0007892	total: 1.67s	remaining: 239ms
875:	learn: 0.0007892	total: 1.68s	remaining: 237ms
876:	learn: 0.0007892	total: 1.68s	remaining: 235ms
877:	learn: 0.0007892	total: 1.68s	remaining: 233ms
878:	learn: 0.0007892	total: 1.68s	remaining: 231ms
879:	learn: 0.0007892	total: 1.68s	remaining: 229ms
880:	learn: 0.0007892	total: 1.68s	remaining: 228ms
881:	learn: 0.0007892	total: 1.69s	remaining: 226ms
882:	learn: 0.0007892	total: 1.69s	remaining: 224ms
883:	learn: 0.0007892	total: 1.69s	remaining: 222ms
884:	learn: 0.0007892	total: 1.69s	remaining: 220ms
885:	learn: 0.0007892	total: 1.69s	remaining: 218ms
886:	learn: 0.0007892	total: 1.69s	remaining: 216ms
887:	learn: 0.0007892	total: 1.7s	remaining: 214ms
888:	learn: 0.0007892	total: 1.7s	remaining: 212ms
889:	learn: 0.0007892	total: 1.7s	remaining: 210ms
890:	learn: 0.0007892	total: 1.7s	remaining: 208ms
891:	learn: 0.0007892	total: 1.7s	remaining: 206ms
892:	learn: 0.0007892	total: 1.7s	remaining: 204ms
893:	learn: 0.0007892	total: 1.71s	remaining: 202ms
894:	learn: 0.0007892	total: 1.71s	remaining: 200ms
895:	learn: 0.0007892	total: 1.71s	remaining: 198ms
896:	learn: 0.0007892	total: 1.71s	remaining: 197ms
897:	learn: 0.0007892	total: 1.71s	remaining: 195ms
898:	learn: 0.0007891	total: 1.71s	remaining: 193ms
899:	learn: 0.0007892	total: 1.72s	remaining: 191ms
900:	learn: 0.0007892	total: 1.72s	remaining: 189ms
901:	learn: 0.0007891	total: 1.72s	remaining: 187ms
902:	learn: 0.0007891	total: 1.72s	remaining: 185ms
903:	learn: 0.0007891	total: 1.72s	remaining: 183ms
904:	learn: 0.0007891	total: 1.72s	remaining: 181ms
905:	learn: 0.0007891	total: 1.73s	remaining: 179ms
906:	learn: 0.0007891	total: 1.73s	remaining: 177ms
907:	learn: 0.0007891	total: 1.73s	remaining: 175ms
908:	learn: 0.0007891	total: 1.73s	remaining: 173ms
909:	learn: 0.0007891	total: 1.73s	remaining: 171ms
910:	learn: 0.0007891	total: 1.73s	remaining: 169ms
911:	learn: 0.0007891	total: 1.74s	remaining: 168ms
912:	learn: 0.0007891	total: 1.74s	remaining: 166ms
913:	learn: 0.0007891	total: 1.74s	remaining: 164ms
914:	learn: 0.0007891	total: 1.74s	remaining: 162ms
915:	learn: 0.0007891	total: 1.74s	remaining: 160ms
916:	learn: 0.0007891	total: 1.74s	remaining: 158ms
917:	learn: 0.0007891	total: 1.75s	remaining: 156ms
918:	learn: 0.0007891	total: 1.75s	remaining: 154ms
919:	learn: 0.0007891	total: 1.75s	remaining: 152ms
920:	learn: 0.0007891	total: 1.75s	remaining: 150ms
921:	learn: 0.0007891	total: 1.75s	remaining: 148ms
922:	learn: 0.0007891	total: 1.75s	remaining: 146ms
923:	learn: 0.0007891	total: 1.76s	remaining: 144ms
924:	learn: 0.0007891	total: 1.76s	remaining: 143ms
925:	learn: 0.0007891	total: 1.76s	remaining: 141ms
926:	learn: 0.0007891	total: 1.76s	remaining: 139ms
927:	learn: 0.0007891	total: 1.76s	remaining: 137ms
928:	learn: 0.0007891	total: 1.76s	remaining: 135ms
929:	learn: 0.0007891	total: 1.77s	remaining: 133ms
930:	learn: 0.0007891	total: 1.77s	remaining: 131ms
931:	learn: 0.0007891	total: 1.77s	remaining: 129ms
932:	learn: 0.0007890	total: 1.77s	remaining: 127ms
933:	learn: 0.0007890	total: 1.77s	remaining: 125ms
934:	learn: 0.0007890	total: 1.77s	remaining: 123ms
935:	learn: 0.0007890	total: 1.78s	remaining: 121ms
936:	learn: 0.0007890	total: 1.78s	remaining: 120ms
937:	learn: 0.0007890	total: 1.78s	remaining: 118ms
938:	learn: 0.0007890	total: 1.78s	remaining: 116ms
939:	learn: 0.0007890	total: 1.78s	remaining: 114ms
940:	learn: 0.0007890	total: 1.78s	remaining: 112ms
941:	learn: 0.0007890	total: 1.79s	remaining: 110ms
942:	learn: 0.0007890	total: 1.79s	remaining: 108ms
943:	learn: 0.0007890	total: 1.79s	remaining: 106ms
944:	learn: 0.0007890	total: 1.79s	remaining: 104ms
945:	learn: 0.0007890	total: 1.79s	remaining: 102ms
946:	learn: 0.0007890	total: 1.79s	remaining: 100ms
947:	learn: 0.0007890	total: 1.8s	remaining: 98.6ms
948:	learn: 0.0007890	total: 1.8s	remaining: 96.7ms
949:	learn: 0.0007890	total: 1.8s	remaining: 94.8ms
950:	learn: 0.0007890	total: 1.8s	remaining: 92.9ms
951:	learn: 0.0007890	total: 1.8s	remaining: 91ms
952:	learn: 0.0007890	total: 1.8s	remaining: 89.1ms
953:	learn: 0.0007890	total: 1.81s	remaining: 87.1ms
954:	learn: 0.0007890	total: 1.81s	remaining: 85.3ms
955:	learn: 0.0007890	total: 1.81s	remaining: 83.3ms
956:	learn: 0.0007890	total: 1.81s	remaining: 81.4ms
957:	learn: 0.0007889	total: 1.81s	remaining: 79.5ms
958:	learn: 0.0007890	total: 1.82s	remaining: 77.6ms
959:	learn: 0.0007890	total: 1.82s	remaining: 75.7ms
960:	learn: 0.0007890	total: 1.82s	remaining: 73.8ms
961:	learn: 0.0007889	total: 1.82s	remaining: 71.9ms
962:	learn: 0.0007890	total: 1.82s	remaining: 70ms
963:	learn: 0.0007890	total: 1.82s	remaining: 68.1ms
964:	learn: 0.0007889	total: 1.83s	remaining: 66.3ms
965:	learn: 0.0007889	total: 1.83s	remaining: 64.4ms
966:	learn: 0.0007889	total: 1.83s	remaining: 62.5ms
967:	learn: 0.0007889	total: 1.83s	remaining: 60.6ms
968:	learn: 0.0007889	total: 1.83s	remaining: 58.7ms
969:	learn: 0.0007889	total: 1.83s	remaining: 56.8ms
970:	learn: 0.0007889	total: 1.84s	remaining: 54.9ms
971:	learn: 0.0007889	total: 1.84s	remaining: 53ms
972:	learn: 0.0007889	total: 1.84s	remaining: 51.1ms
973:	learn: 0.0007889	total: 1.84s	remaining: 49.2ms
974:	learn: 0.0007889	total: 1.84s	remaining: 47.3ms
975:	learn: 0.0007889	total: 1.84s	remaining: 45.4ms
976:	learn: 0.0007889	total: 1.85s	remaining: 43.5ms
977:	learn: 0.0007889	total: 1.85s	remaining: 41.6ms
978:	learn: 0.0007889	total: 1.85s	remaining: 39.7ms
979:	learn: 0.0007889	total: 1.85s	remaining: 37.8ms
980:	learn: 0.0007889	total: 1.85s	remaining: 35.9ms
981:	learn: 0.0007889	total: 1.85s	remaining: 34ms
982:	learn: 0.0007889	total: 1.86s	remaining: 32.1ms
983:	learn: 0.0007889	total: 1.86s	remaining: 30.2ms
984:	learn: 0.0007889	total: 1.86s	remaining: 28.3ms
985:	learn: 0.0007889	total: 1.86s	remaining: 26.4ms
986:	learn: 0.0007889	total: 1.86s	remaining: 24.5ms
987:	learn: 0.0007889	total: 1.86s	remaining: 22.7ms
988:	learn: 0.0007889	total: 1.87s	remaining: 20.8ms
989:	learn: 0.0007889	total: 1.87s	remaining: 18.9ms
990:	learn: 0.0007889	total: 1.87s	remaining: 17ms
991:	learn: 0.0007889	total: 1.87s	remaining: 15.1ms
992:	learn: 0.0007888	total: 1.87s	remaining: 13.2ms
993:	learn: 0.0007889	total: 1.88s	remaining: 11.3ms
994:	learn: 0.0007889	total: 1.88s	remaining: 9.43ms
995:	learn: 0.0007888	total: 1.88s	remaining: 7.54ms
996:	learn: 0.0007888	total: 1.88s	remaining: 5.66ms
997:	learn: 0.0007888	total: 1.88s	remaining: 3.77ms
998:	learn: 0.0007888	total: 1.88s	remaining: 1.89ms
999:	learn: 0.0007888	total: 1.89s	remaining: 0us
In [381]:
y_pred = cat_model.predict(X_test)
accuracy_score(y_test, y_pred)
Out[381]:
1.0
In [383]:
catb_params = {
    'iterations': [200,500],
    'learning_rate': [0.01,0.05, 0.1],
    'depth': [3,5,8] }
In [385]:
catb = CatBoostClassifier()
catb_cv_model = GridSearchCV(catb, catb_params, cv=5, n_jobs = -1, verbose = 2)
catb_cv_model.fit(X_train, y_train)
catb_cv_model.best_params_
Fitting 5 folds for each of 18 candidates, totalling 90 fits
0:	learn: 0.6595468	total: 1.28ms	remaining: 256ms
1:	learn: 0.6259209	total: 2.59ms	remaining: 257ms
2:	learn: 0.5941751	total: 3.8ms	remaining: 250ms
3:	learn: 0.5632290	total: 4.91ms	remaining: 241ms
4:	learn: 0.5366301	total: 6.11ms	remaining: 238ms
5:	learn: 0.5097550	total: 7.29ms	remaining: 236ms
6:	learn: 0.4841281	total: 8.51ms	remaining: 235ms
7:	learn: 0.4569797	total: 9.53ms	remaining: 229ms
8:	learn: 0.4344650	total: 10.7ms	remaining: 227ms
9:	learn: 0.4130139	total: 12ms	remaining: 227ms
10:	learn: 0.3899785	total: 13.1ms	remaining: 226ms
11:	learn: 0.3708218	total: 14.3ms	remaining: 225ms
12:	learn: 0.3524314	total: 15.5ms	remaining: 223ms
13:	learn: 0.3349809	total: 16.7ms	remaining: 222ms
14:	learn: 0.3169636	total: 17.9ms	remaining: 221ms
15:	learn: 0.3002906	total: 19ms	remaining: 219ms
16:	learn: 0.2854770	total: 20.2ms	remaining: 217ms
17:	learn: 0.2713017	total: 21.4ms	remaining: 216ms
18:	learn: 0.2579750	total: 22.6ms	remaining: 215ms
19:	learn: 0.2449128	total: 23.8ms	remaining: 214ms
20:	learn: 0.2325563	total: 25ms	remaining: 213ms
21:	learn: 0.2213074	total: 26.2ms	remaining: 212ms
22:	learn: 0.2105813	total: 27.4ms	remaining: 211ms
23:	learn: 0.2004724	total: 28.7ms	remaining: 211ms
24:	learn: 0.1908160	total: 29.9ms	remaining: 210ms
25:	learn: 0.1808626	total: 31.2ms	remaining: 209ms
26:	learn: 0.1722519	total: 32.4ms	remaining: 208ms
27:	learn: 0.1638434	total: 33.7ms	remaining: 207ms
28:	learn: 0.1554835	total: 34.9ms	remaining: 206ms
29:	learn: 0.1477724	total: 36.1ms	remaining: 205ms
30:	learn: 0.1404945	total: 37.3ms	remaining: 203ms
31:	learn: 0.1335736	total: 38.5ms	remaining: 202ms
32:	learn: 0.1269453	total: 39.7ms	remaining: 201ms
33:	learn: 0.1208221	total: 40.9ms	remaining: 200ms
34:	learn: 0.1153437	total: 42.2ms	remaining: 199ms
35:	learn: 0.1098383	total: 43.3ms	remaining: 197ms
36:	learn: 0.1048613	total: 44.7ms	remaining: 197ms
37:	learn: 0.1001649	total: 45.8ms	remaining: 195ms
38:	learn: 0.0956594	total: 47ms	remaining: 194ms
39:	learn: 0.0914333	total: 48.3ms	remaining: 193ms
40:	learn: 0.0874844	total: 49.5ms	remaining: 192ms
41:	learn: 0.0836310	total: 50.7ms	remaining: 191ms
42:	learn: 0.0800624	total: 51.9ms	remaining: 190ms
43:	learn: 0.0768335	total: 53.2ms	remaining: 188ms
44:	learn: 0.0734384	total: 54.4ms	remaining: 187ms
45:	learn: 0.0703208	total: 55.5ms	remaining: 186ms
46:	learn: 0.0672899	total: 56.7ms	remaining: 184ms
47:	learn: 0.0643721	total: 57.9ms	remaining: 183ms
48:	learn: 0.0615388	total: 59.2ms	remaining: 182ms
49:	learn: 0.0590989	total: 60.4ms	remaining: 181ms
50:	learn: 0.0567096	total: 61.6ms	remaining: 180ms
51:	learn: 0.0544305	total: 62.8ms	remaining: 179ms
52:	learn: 0.0523452	total: 64ms	remaining: 177ms
53:	learn: 0.0498063	total: 65.1ms	remaining: 176ms
54:	learn: 0.0476854	total: 66.2ms	remaining: 175ms
55:	learn: 0.0458659	total: 67.5ms	remaining: 174ms
56:	learn: 0.0439035	total: 68.8ms	remaining: 173ms
57:	learn: 0.0421772	total: 70.1ms	remaining: 172ms
58:	learn: 0.0403579	total: 71.4ms	remaining: 171ms
59:	learn: 0.0387628	total: 72.6ms	remaining: 169ms
60:	learn: 0.0371524	total: 73.8ms	remaining: 168ms
61:	learn: 0.0356815	total: 75.2ms	remaining: 167ms
62:	learn: 0.0343486	total: 76.5ms	remaining: 166ms
63:	learn: 0.0329325	total: 77.7ms	remaining: 165ms
64:	learn: 0.0315918	total: 79ms	remaining: 164ms
65:	learn: 0.0304244	total: 80.3ms	remaining: 163ms
66:	learn: 0.0292175	total: 81.6ms	remaining: 162ms
67:	learn: 0.0282400	total: 82.7ms	remaining: 161ms
68:	learn: 0.0272602	total: 84ms	remaining: 159ms
69:	learn: 0.0262688	total: 85.2ms	remaining: 158ms
70:	learn: 0.0254206	total: 86.3ms	remaining: 157ms
71:	learn: 0.0245386	total: 87.4ms	remaining: 155ms
72:	learn: 0.0236300	total: 88.6ms	remaining: 154ms
73:	learn: 0.0228265	total: 89.8ms	remaining: 153ms
74:	learn: 0.0221196	total: 91.1ms	remaining: 152ms
75:	learn: 0.0213787	total: 92.3ms	remaining: 151ms
76:	learn: 0.0204883	total: 93.4ms	remaining: 149ms
77:	learn: 0.0198340	total: 94.6ms	remaining: 148ms
78:	learn: 0.0190185	total: 95.7ms	remaining: 147ms
79:	learn: 0.0184044	total: 96.9ms	remaining: 145ms
80:	learn: 0.0178198	total: 98ms	remaining: 144ms
81:	learn: 0.0172466	total: 99.2ms	remaining: 143ms
82:	learn: 0.0167424	total: 100ms	remaining: 141ms
83:	learn: 0.0162306	total: 102ms	remaining: 140ms
84:	learn: 0.0156537	total: 103ms	remaining: 139ms
85:	learn: 0.0151420	total: 104ms	remaining: 138ms
86:	learn: 0.0146110	total: 105ms	remaining: 137ms
87:	learn: 0.0140455	total: 106ms	remaining: 135ms
88:	learn: 0.0135373	total: 107ms	remaining: 134ms
89:	learn: 0.0131767	total: 109ms	remaining: 133ms
90:	learn: 0.0127442	total: 110ms	remaining: 132ms
91:	learn: 0.0123519	total: 111ms	remaining: 130ms
92:	learn: 0.0120279	total: 112ms	remaining: 129ms
93:	learn: 0.0116475	total: 113ms	remaining: 128ms
94:	learn: 0.0112956	total: 115ms	remaining: 127ms
95:	learn: 0.0109911	total: 116ms	remaining: 126ms
96:	learn: 0.0106855	total: 117ms	remaining: 124ms
97:	learn: 0.0103866	total: 118ms	remaining: 123ms
98:	learn: 0.0100137	total: 119ms	remaining: 122ms
99:	learn: 0.0096872	total: 121ms	remaining: 121ms
100:	learn: 0.0094532	total: 122ms	remaining: 119ms
101:	learn: 0.0092026	total: 123ms	remaining: 118ms
102:	learn: 0.0089828	total: 124ms	remaining: 117ms
103:	learn: 0.0087387	total: 126ms	remaining: 116ms
104:	learn: 0.0085131	total: 127ms	remaining: 115ms
105:	learn: 0.0082750	total: 128ms	remaining: 114ms
106:	learn: 0.0080488	total: 129ms	remaining: 112ms
107:	learn: 0.0078430	total: 131ms	remaining: 111ms
108:	learn: 0.0076567	total: 132ms	remaining: 110ms
109:	learn: 0.0074510	total: 133ms	remaining: 109ms
110:	learn: 0.0072476	total: 134ms	remaining: 108ms
111:	learn: 0.0070855	total: 135ms	remaining: 106ms
112:	learn: 0.0069208	total: 136ms	remaining: 105ms
113:	learn: 0.0067316	total: 138ms	remaining: 104ms
114:	learn: 0.0065632	total: 139ms	remaining: 103ms
115:	learn: 0.0064088	total: 140ms	remaining: 101ms
116:	learn: 0.0062443	total: 141ms	remaining: 100ms
117:	learn: 0.0061037	total: 142ms	remaining: 99ms
118:	learn: 0.0059522	total: 144ms	remaining: 97.7ms
119:	learn: 0.0058117	total: 145ms	remaining: 96.5ms
120:	learn: 0.0056876	total: 146ms	remaining: 95.3ms
121:	learn: 0.0055594	total: 147ms	remaining: 94ms
122:	learn: 0.0054362	total: 148ms	remaining: 92.8ms
123:	learn: 0.0052925	total: 149ms	remaining: 91.6ms
124:	learn: 0.0051599	total: 151ms	remaining: 90.3ms
125:	learn: 0.0050524	total: 152ms	remaining: 89.1ms
126:	learn: 0.0049303	total: 153ms	remaining: 88ms
127:	learn: 0.0048181	total: 154ms	remaining: 86.8ms
128:	learn: 0.0047235	total: 156ms	remaining: 85.6ms
129:	learn: 0.0046187	total: 157ms	remaining: 84.4ms
130:	learn: 0.0045162	total: 158ms	remaining: 83.2ms
131:	learn: 0.0044202	total: 159ms	remaining: 82.1ms
132:	learn: 0.0043386	total: 161ms	remaining: 80.9ms
133:	learn: 0.0042540	total: 162ms	remaining: 79.7ms
134:	learn: 0.0041706	total: 163ms	remaining: 78.5ms
135:	learn: 0.0040881	total: 164ms	remaining: 77.3ms
136:	learn: 0.0040048	total: 165ms	remaining: 76.1ms
137:	learn: 0.0039251	total: 167ms	remaining: 74.9ms
138:	learn: 0.0038349	total: 168ms	remaining: 73.7ms
139:	learn: 0.0037611	total: 169ms	remaining: 72.5ms
140:	learn: 0.0036952	total: 170ms	remaining: 71.3ms
141:	learn: 0.0036313	total: 172ms	remaining: 70.1ms
142:	learn: 0.0035579	total: 173ms	remaining: 68.9ms
143:	learn: 0.0034892	total: 174ms	remaining: 67.7ms
144:	learn: 0.0034148	total: 175ms	remaining: 66.5ms
145:	learn: 0.0033443	total: 176ms	remaining: 65.3ms
146:	learn: 0.0032847	total: 178ms	remaining: 64ms
147:	learn: 0.0032149	total: 179ms	remaining: 62.8ms
148:	learn: 0.0031560	total: 180ms	remaining: 61.6ms
149:	learn: 0.0031024	total: 181ms	remaining: 60.4ms
150:	learn: 0.0030363	total: 182ms	remaining: 59.2ms
151:	learn: 0.0029826	total: 184ms	remaining: 58ms
152:	learn: 0.0029267	total: 185ms	remaining: 56.8ms
153:	learn: 0.0028785	total: 186ms	remaining: 55.6ms
154:	learn: 0.0028146	total: 187ms	remaining: 54.4ms
155:	learn: 0.0027677	total: 189ms	remaining: 53.2ms
156:	learn: 0.0027276	total: 190ms	remaining: 52ms
157:	learn: 0.0026715	total: 191ms	remaining: 50.7ms
158:	learn: 0.0026246	total: 192ms	remaining: 49.5ms
159:	learn: 0.0025827	total: 193ms	remaining: 48.3ms
160:	learn: 0.0025274	total: 194ms	remaining: 47.1ms
161:	learn: 0.0024871	total: 196ms	remaining: 45.9ms
162:	learn: 0.0024466	total: 197ms	remaining: 44.7ms
163:	learn: 0.0023988	total: 198ms	remaining: 43.4ms
164:	learn: 0.0023560	total: 199ms	remaining: 42.2ms
165:	learn: 0.0023219	total: 200ms	remaining: 41ms
166:	learn: 0.0022850	total: 201ms	remaining: 39.8ms
167:	learn: 0.0022475	total: 203ms	remaining: 38.6ms
168:	learn: 0.0022118	total: 204ms	remaining: 37.4ms
169:	learn: 0.0021777	total: 205ms	remaining: 36.2ms
170:	learn: 0.0021421	total: 206ms	remaining: 35ms
171:	learn: 0.0021023	total: 208ms	remaining: 33.8ms
172:	learn: 0.0020732	total: 209ms	remaining: 32.6ms
173:	learn: 0.0020376	total: 210ms	remaining: 31.4ms
174:	learn: 0.0020070	total: 212ms	remaining: 30.2ms
175:	learn: 0.0019800	total: 213ms	remaining: 29ms
176:	learn: 0.0019505	total: 214ms	remaining: 27.8ms
177:	learn: 0.0019204	total: 215ms	remaining: 26.6ms
178:	learn: 0.0018917	total: 217ms	remaining: 25.4ms
179:	learn: 0.0018650	total: 218ms	remaining: 24.2ms
180:	learn: 0.0018381	total: 219ms	remaining: 23ms
181:	learn: 0.0018168	total: 221ms	remaining: 21.8ms
182:	learn: 0.0017910	total: 222ms	remaining: 20.6ms
183:	learn: 0.0017680	total: 223ms	remaining: 19.4ms
184:	learn: 0.0017472	total: 224ms	remaining: 18.2ms
185:	learn: 0.0017226	total: 226ms	remaining: 17ms
186:	learn: 0.0016991	total: 227ms	remaining: 15.8ms
187:	learn: 0.0016752	total: 228ms	remaining: 14.6ms
188:	learn: 0.0016481	total: 230ms	remaining: 13.4ms
189:	learn: 0.0016252	total: 231ms	remaining: 12.2ms
190:	learn: 0.0016045	total: 232ms	remaining: 10.9ms
191:	learn: 0.0015870	total: 234ms	remaining: 9.74ms
192:	learn: 0.0015656	total: 235ms	remaining: 8.52ms
193:	learn: 0.0015454	total: 236ms	remaining: 7.31ms
194:	learn: 0.0015228	total: 237ms	remaining: 6.09ms
195:	learn: 0.0015058	total: 239ms	remaining: 4.87ms
196:	learn: 0.0014873	total: 240ms	remaining: 3.65ms
197:	learn: 0.0014695	total: 241ms	remaining: 2.44ms
198:	learn: 0.0014515	total: 243ms	remaining: 1.22ms
199:	learn: 0.0014340	total: 244ms	remaining: 0us
Out[385]:
{'depth': 3, 'iterations': 200, 'learning_rate': 0.01}
In [387]:
catb_cv_model.best_params_
Out[387]:
{'depth': 3, 'iterations': 200, 'learning_rate': 0.01}
In [389]:
catb = CatBoostClassifier(iterations = 200, 
                          learning_rate = 0.01, 
                          depth = 8)

catb_tuned = catb.fit(X_train, y_train)
y_pred = catb_tuned.predict(X_test)
0:	learn: 0.6674337	total: 3.68ms	remaining: 732ms
1:	learn: 0.6386795	total: 5.65ms	remaining: 560ms
2:	learn: 0.6116826	total: 8.82ms	remaining: 579ms
3:	learn: 0.5815681	total: 10.3ms	remaining: 506ms
4:	learn: 0.5605616	total: 15.4ms	remaining: 602ms
5:	learn: 0.5327449	total: 21.4ms	remaining: 693ms
6:	learn: 0.5095491	total: 25.3ms	remaining: 696ms
7:	learn: 0.4884327	total: 30ms	remaining: 719ms
8:	learn: 0.4610596	total: 31.4ms	remaining: 666ms
9:	learn: 0.4432132	total: 36.2ms	remaining: 687ms
10:	learn: 0.4201282	total: 37.7ms	remaining: 648ms
11:	learn: 0.4051588	total: 41.2ms	remaining: 645ms
12:	learn: 0.3856329	total: 43.3ms	remaining: 622ms
13:	learn: 0.3648093	total: 44.8ms	remaining: 595ms
14:	learn: 0.3500280	total: 48.8ms	remaining: 602ms
15:	learn: 0.3348621	total: 51.2ms	remaining: 589ms
16:	learn: 0.3203382	total: 54.4ms	remaining: 586ms
17:	learn: 0.3073090	total: 57.8ms	remaining: 584ms
18:	learn: 0.2958863	total: 61.2ms	remaining: 583ms
19:	learn: 0.2840642	total: 64.4ms	remaining: 580ms
20:	learn: 0.2740001	total: 67.6ms	remaining: 577ms
21:	learn: 0.2631915	total: 71ms	remaining: 575ms
22:	learn: 0.2533232	total: 74.4ms	remaining: 573ms
23:	learn: 0.2441702	total: 77.9ms	remaining: 571ms
24:	learn: 0.2356840	total: 81.1ms	remaining: 568ms
25:	learn: 0.2273163	total: 84.5ms	remaining: 565ms
26:	learn: 0.2181152	total: 87.6ms	remaining: 561ms
27:	learn: 0.2112030	total: 90.7ms	remaining: 557ms
28:	learn: 0.2040427	total: 94ms	remaining: 554ms
29:	learn: 0.1940757	total: 95.4ms	remaining: 541ms
30:	learn: 0.1867837	total: 98.6ms	remaining: 537ms
31:	learn: 0.1809468	total: 102ms	remaining: 534ms
32:	learn: 0.1749376	total: 105ms	remaining: 530ms
33:	learn: 0.1693432	total: 108ms	remaining: 527ms
34:	learn: 0.1625870	total: 110ms	remaining: 518ms
35:	learn: 0.1573514	total: 113ms	remaining: 514ms
36:	learn: 0.1520892	total: 116ms	remaining: 511ms
37:	learn: 0.1450636	total: 117ms	remaining: 500ms
38:	learn: 0.1409237	total: 121ms	remaining: 497ms
39:	learn: 0.1357986	total: 123ms	remaining: 493ms
40:	learn: 0.1312019	total: 126ms	remaining: 490ms
41:	learn: 0.1263063	total: 129ms	remaining: 487ms
42:	learn: 0.1219785	total: 133ms	remaining: 484ms
43:	learn: 0.1175381	total: 134ms	remaining: 477ms
44:	learn: 0.1112586	total: 136ms	remaining: 467ms
45:	learn: 0.1061615	total: 137ms	remaining: 458ms
46:	learn: 0.1034262	total: 140ms	remaining: 455ms
47:	learn: 0.0993954	total: 143ms	remaining: 452ms
48:	learn: 0.0955029	total: 144ms	remaining: 445ms
49:	learn: 0.0925824	total: 148ms	remaining: 443ms
50:	learn: 0.0905601	total: 151ms	remaining: 440ms
51:	learn: 0.0874680	total: 153ms	remaining: 435ms
52:	learn: 0.0853901	total: 156ms	remaining: 433ms
53:	learn: 0.0819556	total: 158ms	remaining: 427ms
54:	learn: 0.0787584	total: 161ms	remaining: 424ms
55:	learn: 0.0766212	total: 164ms	remaining: 422ms
56:	learn: 0.0742543	total: 167ms	remaining: 419ms
57:	learn: 0.0720224	total: 170ms	remaining: 416ms
58:	learn: 0.0697375	total: 173ms	remaining: 414ms
59:	learn: 0.0677118	total: 176ms	remaining: 411ms
60:	learn: 0.0660644	total: 179ms	remaining: 409ms
61:	learn: 0.0639365	total: 182ms	remaining: 406ms
62:	learn: 0.0623641	total: 186ms	remaining: 404ms
63:	learn: 0.0608367	total: 189ms	remaining: 401ms
64:	learn: 0.0588202	total: 192ms	remaining: 398ms
65:	learn: 0.0574737	total: 195ms	remaining: 396ms
66:	learn: 0.0563396	total: 198ms	remaining: 393ms
67:	learn: 0.0548423	total: 201ms	remaining: 390ms
68:	learn: 0.0532666	total: 203ms	remaining: 386ms
69:	learn: 0.0515519	total: 207ms	remaining: 384ms
70:	learn: 0.0502591	total: 210ms	remaining: 381ms
71:	learn: 0.0490098	total: 213ms	remaining: 379ms
72:	learn: 0.0467149	total: 214ms	remaining: 373ms
73:	learn: 0.0457303	total: 218ms	remaining: 371ms
74:	learn: 0.0445577	total: 221ms	remaining: 368ms
75:	learn: 0.0432739	total: 224ms	remaining: 366ms
76:	learn: 0.0423363	total: 228ms	remaining: 364ms
77:	learn: 0.0415304	total: 231ms	remaining: 361ms
78:	learn: 0.0404405	total: 234ms	remaining: 358ms
79:	learn: 0.0393911	total: 237ms	remaining: 355ms
80:	learn: 0.0384185	total: 240ms	remaining: 352ms
81:	learn: 0.0372283	total: 243ms	remaining: 350ms
82:	learn: 0.0365177	total: 246ms	remaining: 347ms
83:	learn: 0.0355646	total: 249ms	remaining: 344ms
84:	learn: 0.0347002	total: 252ms	remaining: 341ms
85:	learn: 0.0337786	total: 255ms	remaining: 338ms
86:	learn: 0.0329459	total: 258ms	remaining: 335ms
87:	learn: 0.0318274	total: 260ms	remaining: 331ms
88:	learn: 0.0311323	total: 263ms	remaining: 328ms
89:	learn: 0.0303998	total: 266ms	remaining: 325ms
90:	learn: 0.0297805	total: 269ms	remaining: 322ms
91:	learn: 0.0289198	total: 271ms	remaining: 318ms
92:	learn: 0.0282585	total: 274ms	remaining: 315ms
93:	learn: 0.0276928	total: 277ms	remaining: 312ms
94:	learn: 0.0271543	total: 280ms	remaining: 310ms
95:	learn: 0.0262780	total: 282ms	remaining: 305ms
96:	learn: 0.0256569	total: 284ms	remaining: 302ms
97:	learn: 0.0250403	total: 288ms	remaining: 299ms
98:	learn: 0.0244318	total: 291ms	remaining: 296ms
99:	learn: 0.0238450	total: 293ms	remaining: 293ms
100:	learn: 0.0232284	total: 296ms	remaining: 290ms
101:	learn: 0.0227745	total: 299ms	remaining: 287ms
102:	learn: 0.0224504	total: 302ms	remaining: 284ms
103:	learn: 0.0219963	total: 305ms	remaining: 282ms
104:	learn: 0.0216729	total: 308ms	remaining: 279ms
105:	learn: 0.0211069	total: 310ms	remaining: 275ms
106:	learn: 0.0207598	total: 313ms	remaining: 272ms
107:	learn: 0.0204186	total: 316ms	remaining: 269ms
108:	learn: 0.0200790	total: 319ms	remaining: 266ms
109:	learn: 0.0197391	total: 322ms	remaining: 264ms
110:	learn: 0.0194175	total: 326ms	remaining: 261ms
111:	learn: 0.0191088	total: 329ms	remaining: 258ms
112:	learn: 0.0184258	total: 330ms	remaining: 254ms
113:	learn: 0.0181038	total: 334ms	remaining: 252ms
114:	learn: 0.0177560	total: 337ms	remaining: 249ms
115:	learn: 0.0174523	total: 340ms	remaining: 246ms
116:	learn: 0.0170665	total: 344ms	remaining: 244ms
117:	learn: 0.0167080	total: 347ms	remaining: 241ms
118:	learn: 0.0164368	total: 350ms	remaining: 238ms
119:	learn: 0.0161758	total: 353ms	remaining: 236ms
120:	learn: 0.0159163	total: 357ms	remaining: 233ms
121:	learn: 0.0156155	total: 360ms	remaining: 230ms
122:	learn: 0.0153268	total: 363ms	remaining: 227ms
123:	learn: 0.0148085	total: 364ms	remaining: 223ms
124:	learn: 0.0146222	total: 367ms	remaining: 220ms
125:	learn: 0.0143929	total: 371ms	remaining: 218ms
126:	learn: 0.0141229	total: 374ms	remaining: 215ms
127:	learn: 0.0139366	total: 377ms	remaining: 212ms
128:	learn: 0.0137235	total: 380ms	remaining: 209ms
129:	learn: 0.0135058	total: 384ms	remaining: 207ms
130:	learn: 0.0132373	total: 387ms	remaining: 204ms
131:	learn: 0.0130216	total: 390ms	remaining: 201ms
132:	learn: 0.0128189	total: 393ms	remaining: 198ms
133:	learn: 0.0126473	total: 397ms	remaining: 195ms
134:	learn: 0.0124232	total: 400ms	remaining: 193ms
135:	learn: 0.0121647	total: 404ms	remaining: 190ms
136:	learn: 0.0120279	total: 407ms	remaining: 187ms
137:	learn: 0.0117731	total: 410ms	remaining: 184ms
138:	learn: 0.0115865	total: 413ms	remaining: 181ms
139:	learn: 0.0113978	total: 417ms	remaining: 179ms
140:	learn: 0.0112161	total: 420ms	remaining: 176ms
141:	learn: 0.0110848	total: 424ms	remaining: 173ms
142:	learn: 0.0109318	total: 427ms	remaining: 170ms
143:	learn: 0.0106073	total: 429ms	remaining: 167ms
144:	learn: 0.0102736	total: 430ms	remaining: 163ms
145:	learn: 0.0101610	total: 433ms	remaining: 160ms
146:	learn: 0.0099851	total: 437ms	remaining: 157ms
147:	learn: 0.0098476	total: 440ms	remaining: 155ms
148:	learn: 0.0097119	total: 443ms	remaining: 152ms
149:	learn: 0.0094119	total: 445ms	remaining: 148ms
150:	learn: 0.0092983	total: 448ms	remaining: 145ms
151:	learn: 0.0091583	total: 451ms	remaining: 142ms
152:	learn: 0.0090601	total: 454ms	remaining: 139ms
153:	learn: 0.0089563	total: 457ms	remaining: 137ms
154:	learn: 0.0088347	total: 460ms	remaining: 134ms
155:	learn: 0.0087051	total: 463ms	remaining: 131ms
156:	learn: 0.0086108	total: 466ms	remaining: 128ms
157:	learn: 0.0084889	total: 469ms	remaining: 125ms
158:	learn: 0.0083861	total: 472ms	remaining: 122ms
159:	learn: 0.0082546	total: 475ms	remaining: 119ms
160:	learn: 0.0081076	total: 477ms	remaining: 115ms
161:	learn: 0.0079875	total: 480ms	remaining: 113ms
162:	learn: 0.0078931	total: 483ms	remaining: 110ms
163:	learn: 0.0077882	total: 486ms	remaining: 107ms
164:	learn: 0.0077308	total: 489ms	remaining: 104ms
165:	learn: 0.0076229	total: 492ms	remaining: 101ms
166:	learn: 0.0075257	total: 495ms	remaining: 97.8ms
167:	learn: 0.0074029	total: 498ms	remaining: 94.9ms
168:	learn: 0.0072770	total: 500ms	remaining: 91.6ms
169:	learn: 0.0071588	total: 503ms	remaining: 88.7ms
170:	learn: 0.0070895	total: 506ms	remaining: 85.8ms
171:	learn: 0.0070303	total: 509ms	remaining: 82.9ms
172:	learn: 0.0069656	total: 512ms	remaining: 79.9ms
173:	learn: 0.0068813	total: 515ms	remaining: 77ms
174:	learn: 0.0068259	total: 518ms	remaining: 74ms
175:	learn: 0.0067628	total: 521ms	remaining: 71.1ms
176:	learn: 0.0066664	total: 524ms	remaining: 68.1ms
177:	learn: 0.0065542	total: 527ms	remaining: 65.1ms
178:	learn: 0.0065009	total: 530ms	remaining: 62.1ms
179:	learn: 0.0064268	total: 533ms	remaining: 59.2ms
180:	learn: 0.0063803	total: 536ms	remaining: 56.2ms
181:	learn: 0.0062920	total: 539ms	remaining: 53.3ms
182:	learn: 0.0062361	total: 542ms	remaining: 50.3ms
183:	learn: 0.0061654	total: 545ms	remaining: 47.4ms
184:	learn: 0.0060935	total: 548ms	remaining: 44.4ms
185:	learn: 0.0060179	total: 551ms	remaining: 41.5ms
186:	learn: 0.0059646	total: 554ms	remaining: 38.5ms
187:	learn: 0.0058999	total: 557ms	remaining: 35.6ms
188:	learn: 0.0058299	total: 560ms	remaining: 32.6ms
189:	learn: 0.0057614	total: 563ms	remaining: 29.7ms
190:	learn: 0.0056890	total: 566ms	remaining: 26.7ms
191:	learn: 0.0056322	total: 569ms	remaining: 23.7ms
192:	learn: 0.0055756	total: 573ms	remaining: 20.8ms
193:	learn: 0.0055162	total: 576ms	remaining: 17.8ms
194:	learn: 0.0054634	total: 579ms	remaining: 14.8ms
195:	learn: 0.0054088	total: 582ms	remaining: 11.9ms
196:	learn: 0.0053461	total: 585ms	remaining: 8.91ms
197:	learn: 0.0053016	total: 588ms	remaining: 5.94ms
198:	learn: 0.0052433	total: 591ms	remaining: 2.97ms
199:	learn: 0.0051680	total: 595ms	remaining: 0us
In [391]:
y_pred = catb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
Out[391]:
1.0

Tüm Modellerin Karşılaştırılması¶

In [493]:
modeller = [
    knn_tuned,
    loj_model,
    nb_model,
    mlpc_tuned,
    cart_tuned,
    rf_tuned,
    gbm_tuned,
    catb_tuned,
    lgbm_tuned,
    xgb_tuned
    
]


for model in modeller:
    if(model!=mlpc_tuned):
        y_pred = model.predict(X_test)
    else:
        y_pred = model.predict(X_test_scaled)
        
    dogruluk = accuracy_score(y_test, y_pred)
    isimler = model.__class__.__name__
    print("-"*30)
    print(isimler + ":" )
    print("Accuracy: {:.4%}".format(dogruluk))
------------------------------
KNeighborsClassifier:
Accuracy: 61.0461%
------------------------------
LogisticRegression:
Accuracy: 62.6854%
------------------------------
GaussianNB:
Accuracy: 76.8150%
------------------------------
MLPClassifier:
Accuracy: 100.0000%
------------------------------
DecisionTreeClassifier:
Accuracy: 100.0000%
------------------------------
RandomForestClassifier:
Accuracy: 99.7658%
------------------------------
GradientBoostingClassifier:
Accuracy: 100.0000%
------------------------------
CatBoostClassifier:
Accuracy: 100.0000%
------------------------------
LGBMClassifier:
Accuracy: 100.0000%
------------------------------
XGBClassifier:
Accuracy: 100.0000%
In [501]:
sonuclar = pd.DataFrame(columns=["modeller", "Accuracy"])

for model in modeller:
    if model != mlpc_tuned:
        y_pred = model.predict(X_test)
    else:
        y_pred = model.predict(X_test_scaled)

    dogruluk = accuracy_score(y_test, y_pred)
    isimler = model.__class__.__name__
    sonuc = pd.DataFrame([[isimler, dogruluk * 100]], columns=["modeller", "Accuracy"])
    sonuclar = pd.concat([sonuclar, sonuc], ignore_index=True)

sns.barplot(x='Accuracy', y='modeller', data=sonuclar, color="r")
plt.xlabel('Accuracy %')
plt.title('Modellerin Doğruluk Oranları')
plt.show()
No description has been provided for this image

Modellerin Kaydedilmesi ve Tekrar Kullanılması¶

In [ ]:
# Save Model Using Pickle
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
#uzak sunucudan datanin cekilmesi
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on training set
model = LogisticRegression()
model.fit(X_train, Y_train)
In [ ]:
# save the model to disk
filename = 'data/finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
In [ ]:
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)
In [ ]: